[llvm] 50378a1 - [AArch64] Extra tablegen patterns for smaller extracted addl/addw/subl/subw

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 13 03:44:22 PDT 2023


Author: David Green
Date: 2023-07-13T11:44:17+01:00
New Revision: 50378a16d41209545022975fb253a0ce2d9597cc

URL: https://github.com/llvm/llvm-project/commit/50378a16d41209545022975fb253a0ce2d9597cc
DIFF: https://github.com/llvm/llvm-project/commit/50378a16d41209545022975fb253a0ce2d9597cc.diff

LOG: [AArch64] Extra tablegen patterns for smaller extracted addl/addw/subl/subw

During lowering, especially of smaller vector types, we can end up with
`add (extract_subvector(zext(x), extract_subvector(zext(y))`, which can
be turned into `extract_subvector(add(zext(y), zext(x)))`, which can use
the addl AArch64 instruction. This adds some tablegen patterns for it,
along with addw where only one operand is an extract/extend and subl/subw.

Differential Revision: https://reviews.llvm.org/D153632

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
    llvm/test/CodeGen/AArch64/arm64-ld1.ll
    llvm/test/CodeGen/AArch64/extbinopload.ll
    llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
    llvm/test/CodeGen/AArch64/neon-extadd.ll
    llvm/test/CodeGen/AArch64/uadd_sat_vec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 7199b80826d3a2..a63a36de050ae9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5654,6 +5654,34 @@ defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
 defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
      SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
 
+
+multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> {
+  def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)),
+                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
+            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>;
+  def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)),
+                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
+            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>;
+  def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)),
+                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
+            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>;
+
+  def : Pat<(v4i16 (opnode (v4i16 V64:$Rn),
+                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
+            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
+  def : Pat<(v2i32 (opnode (v2i32 V64:$Rn),
+                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
+            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
+  def : Pat<(v1i64 (opnode (v1i64 V64:$Rn),
+                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
+            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
+}
+
+defm : Neon_addl_extract_patterns<add, zanyext, "UADD">;
+defm : Neon_addl_extract_patterns<add, sext, "SADD">;
+defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">;
+defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
+
 // CodeGen patterns for addhn and subhn instructions, which can actually be
 // written in LLVM IR without too much 
diff iculty.
 

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
index 804c1e7cfc3635..b75783339eda30 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
@@ -381,9 +381,7 @@ define <4 x i16> @anyext_v4i16(ptr %a, ptr %b) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    ldr s1, [x1]
-; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-LE-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-LE-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-LE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
 ; CHECK-LE-NEXT:    shl v0.4h, v0.4h, #8
 ; CHECK-LE-NEXT:    sshr v0.4h, v0.4h, #8
 ; CHECK-LE-NEXT:    ret
@@ -394,9 +392,7 @@ define <4 x i16> @anyext_v4i16(ptr %a, ptr %b) {
 ; CHECK-BE-NEXT:    ldr s1, [x1]
 ; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
 ; CHECK-BE-NEXT:    rev32 v1.8b, v1.8b
-; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-BE-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-BE-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-BE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
 ; CHECK-BE-NEXT:    shl v0.4h, v0.4h, #8
 ; CHECK-BE-NEXT:    sshr v0.4h, v0.4h, #8
 ; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
@@ -413,9 +409,7 @@ define <4 x i32> @anyext_v4i32(ptr %a, ptr %b) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    ldr s1, [x1]
-; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-LE-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-LE-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-LE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
 ; CHECK-LE-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-LE-NEXT:    shl v0.4s, v0.4s, #24
 ; CHECK-LE-NEXT:    sshr v0.4s, v0.4s, #24
@@ -427,9 +421,7 @@ define <4 x i32> @anyext_v4i32(ptr %a, ptr %b) {
 ; CHECK-BE-NEXT:    ldr s1, [x1]
 ; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
 ; CHECK-BE-NEXT:    rev32 v1.8b, v1.8b
-; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-BE-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-BE-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-BE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
 ; CHECK-BE-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-BE-NEXT:    shl v0.4s, v0.4s, #24
 ; CHECK-BE-NEXT:    sshr v0.4s, v0.4s, #24

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 487a2fc155c4db..292617156597d4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -913,11 +913,9 @@ define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
 define void @ld1r_2s_from_dup(ptr nocapture %a, ptr nocapture %b, ptr nocapture %
diff ) nounwind ssp {
 entry:
 ; CHECK: ld1r_2s_from_dup
-; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
-; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
-; CHECK-NEXT: ushll.8h [[ARG1]], [[ARG1]], #0
-; CHECK-NEXT: ushll.8h [[ARG2]], [[ARG2]], #0
-; CHECK-NEXT: sub.4h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
+; CHECK: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
+; CHECK-NEXT: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
+; CHECK-NEXT: usubl.8h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
 ; CHECK-NEXT: str d[[RESREGNUM]], [x2]
 ; CHECK-NEXT: ret
   %tmp1 = load i32, ptr %a, align 4

diff  --git a/llvm/test/CodeGen/AArch64/extbinopload.ll b/llvm/test/CodeGen/AArch64/extbinopload.ll
index aab41a54457640..94cf75391254fe 100644
--- a/llvm/test/CodeGen/AArch64/extbinopload.ll
+++ b/llvm/test/CodeGen/AArch64/extbinopload.ll
@@ -5,9 +5,8 @@ define <4 x i16> @normal_load_v4i8(ptr %p) {
 ; CHECK-LABEL: normal_load_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp s0, s1, [x0]
-; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    uaddl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l1 = load <4 x i8>, ptr %p
   %q = getelementptr i8, ptr %p, i32 4
@@ -38,9 +37,9 @@ define <4 x i16> @load_v4i8(ptr %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp s1, s0, [x0]
 ; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
 ; CHECK-NEXT:    shl v0.4h, v0.4h, #3
-; CHECK-NEXT:    add v0.4h, v1.4h, v0.4h
+; CHECK-NEXT:    uaddw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l1 = load <4 x i8>, ptr %p
   %q = getelementptr i8, ptr %p, i32 4

diff  --git a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
index 4fff9862b49501..f09de0c5b9e1aa 100644
--- a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
@@ -4,9 +4,8 @@
 define <4 x i16> @addls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-LABEL: addls_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
-; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    saddl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = sext <8 x i8> %s0 to <8 x i16>
@@ -20,8 +19,9 @@ entry:
 define <4 x i16> @addws_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-LABEL: addws_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
-; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    saddw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = sext <8 x i8> %s1 to <8 x i16>
@@ -33,9 +33,8 @@ entry:
 define <4 x i16> @addlu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-LABEL: addlu_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    uaddl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = zext <8 x i8> %s0 to <8 x i16>
@@ -49,8 +48,9 @@ entry:
 define <4 x i16> @addwu_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-LABEL: addwu_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    uaddw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = zext <8 x i8> %s1 to <8 x i16>
@@ -62,9 +62,8 @@ entry:
 define <4 x i16> @subls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-LABEL: subls_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
-; CHECK-NEXT:    sub v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ssubl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = sext <8 x i8> %s0 to <8 x i16>
@@ -78,8 +77,9 @@ entry:
 define <4 x i16> @subws_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-LABEL: subws_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
-; CHECK-NEXT:    sub v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ssubw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = sext <8 x i8> %s1 to <8 x i16>
@@ -91,9 +91,8 @@ entry:
 define <4 x i16> @sublu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-LABEL: sublu_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    sub v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    usubl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = zext <8 x i8> %s0 to <8 x i16>
@@ -107,8 +106,9 @@ entry:
 define <4 x i16> @subwu_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-LABEL: subwu_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    sub v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    usubw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = zext <8 x i8> %s1 to <8 x i16>
@@ -178,9 +178,8 @@ entry:
 define <2 x i32> @addls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-LABEL: addls_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
-; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    saddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = sext <4 x i16> %s0 to <4 x i32>
@@ -194,8 +193,9 @@ entry:
 define <2 x i32> @addws_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-LABEL: addws_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
-; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    saddw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = sext <4 x i16> %s1 to <4 x i32>
@@ -207,9 +207,8 @@ entry:
 define <2 x i32> @addlu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-LABEL: addlu_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = zext <4 x i16> %s0 to <4 x i32>
@@ -223,8 +222,9 @@ entry:
 define <2 x i32> @addwu_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-LABEL: addwu_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    uaddw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = zext <4 x i16> %s1 to <4 x i32>
@@ -236,9 +236,8 @@ entry:
 define <2 x i32> @subls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-LABEL: subls_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
-; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
-; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ssubl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = sext <4 x i16> %s0 to <4 x i32>
@@ -252,8 +251,9 @@ entry:
 define <2 x i32> @subws_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-LABEL: subws_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
-; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ssubw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = sext <4 x i16> %s1 to <4 x i32>
@@ -265,9 +265,8 @@ entry:
 define <2 x i32> @sublu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-LABEL: sublu_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    usubl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = zext <4 x i16> %s0 to <4 x i32>
@@ -281,8 +280,9 @@ entry:
 define <2 x i32> @subwu_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-LABEL: subwu_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
-; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    usubw v0.4s, v0.4s, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = zext <4 x i16> %s1 to <4 x i32>
@@ -352,9 +352,8 @@ entry:
 define <1 x i64> @addls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-LABEL: addls_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-NEXT:    add d0, d0, d1
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = sext <2 x i32> %s0 to <2 x i64>
@@ -368,8 +367,9 @@ entry:
 define <1 x i64> @addws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-LABEL: addws_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-NEXT:    add d0, d0, d1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    saddw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = sext <2 x i32> %s1 to <2 x i64>
@@ -381,9 +381,8 @@ entry:
 define <1 x i64> @addlu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-LABEL: addlu_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-NEXT:    add d0, d0, d1
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = zext <2 x i32> %s0 to <2 x i64>
@@ -397,8 +396,9 @@ entry:
 define <1 x i64> @addwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-LABEL: addwu_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-NEXT:    add d0, d0, d1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    uaddw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = zext <2 x i32> %s1 to <2 x i64>
@@ -410,9 +410,8 @@ entry:
 define <1 x i64> @subls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-LABEL: subls_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
-; CHECK-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-NEXT:    sub d0, d0, d1
+; CHECK-NEXT:    ssubl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = sext <2 x i32> %s0 to <2 x i64>
@@ -426,8 +425,9 @@ entry:
 define <1 x i64> @subws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-LABEL: subws_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-NEXT:    sub d0, d0, d1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ssubw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = sext <2 x i32> %s1 to <2 x i64>
@@ -439,9 +439,8 @@ entry:
 define <1 x i64> @sublu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-LABEL: sublu_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
-; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-NEXT:    sub d0, d0, d1
+; CHECK-NEXT:    usubl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s0s = zext <2 x i32> %s0 to <2 x i64>
@@ -455,8 +454,9 @@ entry:
 define <1 x i64> @subwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-LABEL: subwu_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-NEXT:    sub d0, d0, d1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    usubw v0.2d, v0.2d, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %s1s = zext <2 x i32> %s1 to <2 x i64>

diff  --git a/llvm/test/CodeGen/AArch64/neon-extadd.ll b/llvm/test/CodeGen/AArch64/neon-extadd.ll
index 20c6069f7d1885..76fab7ff733bc8 100644
--- a/llvm/test/CodeGen/AArch64/neon-extadd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extadd.ll
@@ -503,17 +503,15 @@ define <20 x i32> @v20(<20 x i8> %s0, <20 x i8> %s1) {
 ; CHECK-NEXT:    ld1 { v3.b }[6], [x9]
 ; CHECK-NEXT:    add x9, sp, #88
 ; CHECK-NEXT:    mov v1.b[6], w6
-; CHECK-NEXT:    ld1 { v4.b }[3], [x12]
 ; CHECK-NEXT:    ld1 { v2.b }[7], [x11]
+; CHECK-NEXT:    ld1 { v4.b }[3], [x12]
 ; CHECK-NEXT:    ld1 { v5.b }[3], [x9]
 ; CHECK-NEXT:    ld1 { v3.b }[7], [x10]
 ; CHECK-NEXT:    mov v1.b[7], w7
-; CHECK-NEXT:    ushll v4.8h, v4.8b, #0
-; CHECK-NEXT:    ushll v5.8h, v5.8b, #0
+; CHECK-NEXT:    uaddl v4.8h, v5.8b, v4.8b
 ; CHECK-NEXT:    uaddl v2.8h, v3.8b, v2.8b
-; CHECK-NEXT:    add v3.4h, v5.4h, v4.4h
 ; CHECK-NEXT:    uaddl v0.8h, v1.8b, v0.8b
-; CHECK-NEXT:    ushll v1.4s, v3.4h, #0
+; CHECK-NEXT:    ushll v1.4s, v4.4h, #0
 ; CHECK-NEXT:    ushll2 v3.4s, v2.8h, #0
 ; CHECK-NEXT:    ushll v2.4s, v2.4h, #0
 ; CHECK-NEXT:    stp q3, q1, [x8, #48]

diff  --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index ef7fde7f93888f..1d0592bab6f65b 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -112,13 +112,11 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind {
 define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
 ; CHECK-LABEL: v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr s1, [x0]
-; CHECK-NEXT:    movi d0, #0xff00ff00ff00ff
-; CHECK-NEXT:    ldr s2, [x1]
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    ushll v2.8h, v2.8b, #0
-; CHECK-NEXT:    add v1.4h, v1.4h, v2.4h
-; CHECK-NEXT:    umin v0.4h, v1.4h, v0.4h
+; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    movi d2, #0xff00ff00ff00ff
+; CHECK-NEXT:    ldr s1, [x1]
+; CHECK-NEXT:    uaddl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    umin v0.4h, v0.4h, v2.4h
 ; CHECK-NEXT:    xtn v0.8b, v0.8h
 ; CHECK-NEXT:    str s0, [x2]
 ; CHECK-NEXT:    ret


        


More information about the llvm-commits mailing list