[llvm] 1d27ad2 - [AArch64] Add tablegen patterns for fp16 fcvtn2.

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 20 06:10:30 PDT 2023


Author: David Green
Date: 2023-06-20T14:10:25+01:00
New Revision: 1d27ad20774435bb97760df124fba99d4c780ccb

URL: https://github.com/llvm/llvm-project/commit/1d27ad20774435bb97760df124fba99d4c780ccb
DIFF: https://github.com/llvm/llvm-project/commit/1d27ad20774435bb97760df124fba99d4c780ccb.diff

LOG: [AArch64] Add tablegen patterns for fp16 fcvtn2.

Similar to the existing f32 pattern, this adds a tablegen pattern for the fp16
fcvtn2.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll
    llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
    llvm/test/CodeGen/AArch64/vector-fcopysign.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index e6af51629c5a0..daf8aed458096 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4673,10 +4673,14 @@ def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
 def : Pat<(concat_vectors V64:$Rd,
                           (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
           (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
-def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
+def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
+          (FCVTNv2i32 V128:$Rn)>;
+def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
+          (FCVTNv4i16 V128:$Rn)>;
 def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
           (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))),
+          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
 defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
 defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
 defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",

diff  --git a/llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll
index af3b52f66de77..74552735179c3 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll
@@ -5,16 +5,14 @@
 define <16 x half> @sitofp_i32(<16 x i32> %a) #0 {
 ; CHECK-LABEL: sitofp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf v1.4s, v1.4s
-; CHECK-NEXT:    scvtf v0.4s, v0.4s
-; CHECK-NEXT:    scvtf v3.4s, v3.4s
 ; CHECK-NEXT:    scvtf v2.4s, v2.4s
-; CHECK-NEXT:    fcvtn v4.4h, v1.4s
+; CHECK-NEXT:    scvtf v0.4s, v0.4s
+; CHECK-NEXT:    scvtf v4.4s, v1.4s
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-NEXT:    fcvtn v3.4h, v3.4s
 ; CHECK-NEXT:    fcvtn v1.4h, v2.4s
-; CHECK-NEXT:    mov v0.d[1], v4.d[0]
-; CHECK-NEXT:    mov v1.d[1], v3.d[0]
+; CHECK-NEXT:    scvtf v2.4s, v3.4s
+; CHECK-NEXT:    fcvtn2 v0.8h, v4.4s
+; CHECK-NEXT:    fcvtn2 v1.8h, v2.4s
 ; CHECK-NEXT:    ret
 
   %1 = sitofp <16 x i32> %a to <16 x half>
@@ -25,28 +23,26 @@ define <16 x half> @sitofp_i32(<16 x i32> %a) #0 {
 define <16 x half> @sitofp_i64(<16 x i64> %a) #0 {
 ; CHECK-LABEL: sitofp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf v2.2d, v2.2d
 ; CHECK-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-NEXT:    scvtf v6.2d, v6.2d
 ; CHECK-NEXT:    scvtf v4.2d, v4.2d
-; CHECK-NEXT:    scvtf v3.2d, v3.2d
+; CHECK-NEXT:    scvtf v2.2d, v2.2d
 ; CHECK-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-NEXT:    scvtf v7.2d, v7.2d
+; CHECK-NEXT:    scvtf v6.2d, v6.2d
 ; CHECK-NEXT:    scvtf v5.2d, v5.2d
-; CHECK-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-NEXT:    scvtf v3.2d, v3.2d
+; CHECK-NEXT:    scvtf v7.2d, v7.2d
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    fcvtn v6.2s, v6.2d
 ; CHECK-NEXT:    fcvtn v4.2s, v4.2d
-; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-NEXT:    fcvtn v6.2s, v6.2d
 ; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT:    fcvtn2 v6.4s, v7.2d
 ; CHECK-NEXT:    fcvtn2 v4.4s, v5.2d
-; CHECK-NEXT:    fcvtn v2.4h, v2.4s
+; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-NEXT:    fcvtn2 v6.4s, v7.2d
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-NEXT:    fcvtn v3.4h, v6.4s
 ; CHECK-NEXT:    fcvtn v1.4h, v4.4s
-; CHECK-NEXT:    mov v0.d[1], v2.d[0]
-; CHECK-NEXT:    mov v1.d[1], v3.d[0]
+; CHECK-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-NEXT:    fcvtn2 v1.8h, v6.4s
 ; CHECK-NEXT:    ret
 
 
@@ -60,16 +56,14 @@ define <16 x half> @sitofp_i64(<16 x i64> %a) #0 {
 define <16 x half> @uitofp_i32(<16 x i32> %a) #0 {
 ; CHECK-LABEL: uitofp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf v1.4s, v1.4s
-; CHECK-NEXT:    ucvtf v0.4s, v0.4s
-; CHECK-NEXT:    ucvtf v3.4s, v3.4s
 ; CHECK-NEXT:    ucvtf v2.4s, v2.4s
-; CHECK-NEXT:    fcvtn v4.4h, v1.4s
+; CHECK-NEXT:    ucvtf v0.4s, v0.4s
+; CHECK-NEXT:    ucvtf v4.4s, v1.4s
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-NEXT:    fcvtn v3.4h, v3.4s
 ; CHECK-NEXT:    fcvtn v1.4h, v2.4s
-; CHECK-NEXT:    mov v0.d[1], v4.d[0]
-; CHECK-NEXT:    mov v1.d[1], v3.d[0]
+; CHECK-NEXT:    ucvtf v2.4s, v3.4s
+; CHECK-NEXT:    fcvtn2 v0.8h, v4.4s
+; CHECK-NEXT:    fcvtn2 v1.8h, v2.4s
 ; CHECK-NEXT:    ret
 
   %1 = uitofp <16 x i32> %a to <16 x half>
@@ -80,28 +74,26 @@ define <16 x half> @uitofp_i32(<16 x i32> %a) #0 {
 define <16 x half> @uitofp_i64(<16 x i64> %a) #0 {
 ; CHECK-LABEL: uitofp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf v2.2d, v2.2d
 ; CHECK-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-NEXT:    ucvtf v6.2d, v6.2d
 ; CHECK-NEXT:    ucvtf v4.2d, v4.2d
-; CHECK-NEXT:    ucvtf v3.2d, v3.2d
+; CHECK-NEXT:    ucvtf v2.2d, v2.2d
 ; CHECK-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-NEXT:    ucvtf v7.2d, v7.2d
+; CHECK-NEXT:    ucvtf v6.2d, v6.2d
 ; CHECK-NEXT:    ucvtf v5.2d, v5.2d
-; CHECK-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-NEXT:    ucvtf v3.2d, v3.2d
+; CHECK-NEXT:    ucvtf v7.2d, v7.2d
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    fcvtn v6.2s, v6.2d
 ; CHECK-NEXT:    fcvtn v4.2s, v4.2d
-; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-NEXT:    fcvtn v6.2s, v6.2d
 ; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT:    fcvtn2 v6.4s, v7.2d
 ; CHECK-NEXT:    fcvtn2 v4.4s, v5.2d
-; CHECK-NEXT:    fcvtn v2.4h, v2.4s
+; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-NEXT:    fcvtn2 v6.4s, v7.2d
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-NEXT:    fcvtn v3.4h, v6.4s
 ; CHECK-NEXT:    fcvtn v1.4h, v4.4s
-; CHECK-NEXT:    mov v0.d[1], v2.d[0]
-; CHECK-NEXT:    mov v1.d[1], v3.d[0]
+; CHECK-NEXT:    fcvtn2 v0.8h, v2.4s
+; CHECK-NEXT:    fcvtn2 v1.8h, v6.4s
 ; CHECK-NEXT:    ret
 
 

diff  --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 92d99ead70168..d46d31e297490 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -302,9 +302,8 @@ entry:
 define <8 x half> @s_to_h(<8 x float> %a) {
 ; CHECK-LABEL: s_to_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fcvtn v1.4h, v1.4s
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    fcvtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
   %1 = fptrunc <8 x float> %a to <8 x half>
   ret <8 x half> %1
@@ -417,13 +416,12 @@ define <8 x half> @sitofp_v8i8(<8 x i8> %a) #0 {
 ; CHECK-CVT-LABEL: sitofp_v8i8:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-CVT-NEXT:    sshll2 v1.4s, v0.8h, #0
-; CHECK-CVT-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    sshll2 v2.4s, v0.8h, #0
 ; CHECK-CVT-NEXT:    scvtf v1.4s, v1.4s
-; CHECK-CVT-NEXT:    scvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-CVT-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-CVT-NEXT:    scvtf v1.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtn2 v0.8h, v1.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: sitofp_v8i8:
@@ -440,20 +438,18 @@ define <16 x half> @sitofp_v16i8(<16 x i8> %a) #0 {
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    sshll2 v1.8h, v0.16b, #0
 ; CHECK-CVT-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-CVT-NEXT:    sshll2 v2.4s, v1.8h, #0
-; CHECK-CVT-NEXT:    sshll v1.4s, v1.4h, #0
-; CHECK-CVT-NEXT:    sshll2 v3.4s, v0.8h, #0
-; CHECK-CVT-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    sshll v2.4s, v1.4h, #0
+; CHECK-CVT-NEXT:    sshll v3.4s, v0.4h, #0
 ; CHECK-CVT-NEXT:    scvtf v2.4s, v2.4s
-; CHECK-CVT-NEXT:    scvtf v1.4s, v1.4s
 ; CHECK-CVT-NEXT:    scvtf v3.4s, v3.4s
-; CHECK-CVT-NEXT:    scvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT:    fcvtn v2.4h, v2.4s
-; CHECK-CVT-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    fcvtn v3.4h, v3.4s
-; CHECK-CVT-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    mov v1.d[1], v2.d[0]
-; CHECK-CVT-NEXT:    mov v0.d[1], v3.d[0]
+; CHECK-CVT-NEXT:    sshll2 v1.4s, v1.8h, #0
+; CHECK-CVT-NEXT:    sshll2 v4.4s, v0.8h, #0
+; CHECK-CVT-NEXT:    scvtf v5.4s, v1.4s
+; CHECK-CVT-NEXT:    fcvtn v1.4h, v2.4s
+; CHECK-CVT-NEXT:    fcvtn v0.4h, v3.4s
+; CHECK-CVT-NEXT:    scvtf v2.4s, v4.4s
+; CHECK-CVT-NEXT:    fcvtn2 v1.8h, v5.4s
+; CHECK-CVT-NEXT:    fcvtn2 v0.8h, v2.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: sitofp_v16i8:
@@ -470,13 +466,12 @@ define <16 x half> @sitofp_v16i8(<16 x i8> %a) #0 {
 define <8 x half> @sitofp_i16(<8 x i16> %a) #0 {
 ; CHECK-CVT-LABEL: sitofp_i16:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    sshll2 v1.4s, v0.8h, #0
-; CHECK-CVT-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    sshll2 v2.4s, v0.8h, #0
 ; CHECK-CVT-NEXT:    scvtf v1.4s, v1.4s
-; CHECK-CVT-NEXT:    scvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-CVT-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-CVT-NEXT:    scvtf v1.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtn2 v0.8h, v1.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: sitofp_i16:
@@ -490,11 +485,10 @@ define <8 x half> @sitofp_i16(<8 x i16> %a) #0 {
 define <8 x half> @sitofp_i32(<8 x i32> %a) #0 {
 ; CHECK-LABEL: sitofp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf v1.4s, v1.4s
 ; CHECK-NEXT:    scvtf v0.4s, v0.4s
-; CHECK-NEXT:    fcvtn v1.4h, v1.4s
+; CHECK-NEXT:    scvtf v1.4s, v1.4s
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    fcvtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
   %1 = sitofp <8 x i32> %a to <8 x half>
   ret <8 x half> %1
@@ -504,17 +498,16 @@ define <8 x half> @sitofp_i32(<8 x i32> %a) #0 {
 define <8 x half> @sitofp_i64(<8 x i64> %a) #0 {
 ; CHECK-LABEL: sitofp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf v2.2d, v2.2d
 ; CHECK-NEXT:    scvtf v0.2d, v0.2d
-; CHECK-NEXT:    scvtf v3.2d, v3.2d
+; CHECK-NEXT:    scvtf v2.2d, v2.2d
 ; CHECK-NEXT:    scvtf v1.2d, v1.2d
-; CHECK-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-NEXT:    scvtf v3.2d, v3.2d
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-NEXT:    fcvtn v2.2s, v2.2d
 ; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT:    fcvtn v1.4h, v2.4s
+; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    fcvtn2 v0.8h, v2.4s
 ; CHECK-NEXT:    ret
   %1 = sitofp <8 x i64> %a to <8 x half>
   ret <8 x half> %1
@@ -542,13 +535,12 @@ define <8 x half> @uitofp_v8i8(<8 x i8> %a) #0 {
 ; CHECK-CVT-LABEL: uitofp_v8i8:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-CVT-NEXT:    ushll2 v1.4s, v0.8h, #0
-; CHECK-CVT-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    ushll2 v2.4s, v0.8h, #0
 ; CHECK-CVT-NEXT:    ucvtf v1.4s, v1.4s
-; CHECK-CVT-NEXT:    ucvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-CVT-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-CVT-NEXT:    ucvtf v1.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtn2 v0.8h, v1.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: uitofp_v8i8:
@@ -565,20 +557,18 @@ define <16 x half> @uitofp_v16i8(<16 x i8> %a) #0 {
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    ushll2 v1.8h, v0.16b, #0
 ; CHECK-CVT-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-CVT-NEXT:    ushll2 v2.4s, v1.8h, #0
-; CHECK-CVT-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-CVT-NEXT:    ushll2 v3.4s, v0.8h, #0
-; CHECK-CVT-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    ushll v2.4s, v1.4h, #0
+; CHECK-CVT-NEXT:    ushll v3.4s, v0.4h, #0
 ; CHECK-CVT-NEXT:    ucvtf v2.4s, v2.4s
-; CHECK-CVT-NEXT:    ucvtf v1.4s, v1.4s
 ; CHECK-CVT-NEXT:    ucvtf v3.4s, v3.4s
-; CHECK-CVT-NEXT:    ucvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT:    fcvtn v2.4h, v2.4s
-; CHECK-CVT-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    fcvtn v3.4h, v3.4s
-; CHECK-CVT-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    mov v1.d[1], v2.d[0]
-; CHECK-CVT-NEXT:    mov v0.d[1], v3.d[0]
+; CHECK-CVT-NEXT:    ushll2 v1.4s, v1.8h, #0
+; CHECK-CVT-NEXT:    ushll2 v4.4s, v0.8h, #0
+; CHECK-CVT-NEXT:    ucvtf v5.4s, v1.4s
+; CHECK-CVT-NEXT:    fcvtn v1.4h, v2.4s
+; CHECK-CVT-NEXT:    fcvtn v0.4h, v3.4s
+; CHECK-CVT-NEXT:    ucvtf v2.4s, v4.4s
+; CHECK-CVT-NEXT:    fcvtn2 v1.8h, v5.4s
+; CHECK-CVT-NEXT:    fcvtn2 v0.8h, v2.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: uitofp_v16i8:
@@ -596,13 +586,12 @@ define <16 x half> @uitofp_v16i8(<16 x i8> %a) #0 {
 define <8 x half> @uitofp_i16(<8 x i16> %a) #0 {
 ; CHECK-CVT-LABEL: uitofp_i16:
 ; CHECK-CVT:       // %bb.0:
-; CHECK-CVT-NEXT:    ushll2 v1.4s, v0.8h, #0
-; CHECK-CVT-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-CVT-NEXT:    ushll2 v2.4s, v0.8h, #0
 ; CHECK-CVT-NEXT:    ucvtf v1.4s, v1.4s
-; CHECK-CVT-NEXT:    ucvtf v0.4s, v0.4s
-; CHECK-CVT-NEXT:    fcvtn v1.4h, v1.4s
-; CHECK-CVT-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-CVT-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-CVT-NEXT:    fcvtn v0.4h, v1.4s
+; CHECK-CVT-NEXT:    ucvtf v1.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtn2 v0.8h, v1.4s
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: uitofp_i16:
@@ -617,11 +606,10 @@ define <8 x half> @uitofp_i16(<8 x i16> %a) #0 {
 define <8 x half> @uitofp_i32(<8 x i32> %a) #0 {
 ; CHECK-LABEL: uitofp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf v1.4s, v1.4s
 ; CHECK-NEXT:    ucvtf v0.4s, v0.4s
-; CHECK-NEXT:    fcvtn v1.4h, v1.4s
+; CHECK-NEXT:    ucvtf v1.4s, v1.4s
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    fcvtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
   %1 = uitofp <8 x i32> %a to <8 x half>
   ret <8 x half> %1
@@ -631,17 +619,16 @@ define <8 x half> @uitofp_i32(<8 x i32> %a) #0 {
 define <8 x half> @uitofp_i64(<8 x i64> %a) #0 {
 ; CHECK-LABEL: uitofp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ucvtf v2.2d, v2.2d
 ; CHECK-NEXT:    ucvtf v0.2d, v0.2d
-; CHECK-NEXT:    ucvtf v3.2d, v3.2d
+; CHECK-NEXT:    ucvtf v2.2d, v2.2d
 ; CHECK-NEXT:    ucvtf v1.2d, v1.2d
-; CHECK-NEXT:    fcvtn v2.2s, v2.2d
+; CHECK-NEXT:    ucvtf v3.2d, v3.2d
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
-; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
+; CHECK-NEXT:    fcvtn v2.2s, v2.2d
 ; CHECK-NEXT:    fcvtn2 v0.4s, v1.2d
-; CHECK-NEXT:    fcvtn v1.4h, v2.4s
+; CHECK-NEXT:    fcvtn2 v2.4s, v3.2d
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    fcvtn2 v0.8h, v2.4s
 ; CHECK-NEXT:    ret
   %1 = uitofp <8 x i64> %a to <8 x half>
   ret <8 x half> %1

diff  --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
index 754d9e8eb7eda..b69b41da9ee72 100644
--- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
@@ -462,9 +462,8 @@ define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 {
 ;
 ; FP16-LABEL: test_copysign_v8f16_v8f32:
 ; FP16:       ; %bb.0:
-; FP16-NEXT:    fcvtn v2.4h, v2.4s
 ; FP16-NEXT:    fcvtn v1.4h, v1.4s
-; FP16-NEXT:    mov.d v1[1], v2[0]
+; FP16-NEXT:    fcvtn2 v1.8h, v2.4s
 ; FP16-NEXT:    mvni.8h v2, #128, lsl #8
 ; FP16-NEXT:    bif.16b v0, v1, v2
 ; FP16-NEXT:    ret


        


More information about the llvm-commits mailing list