[llvm] 2f792f6 - [AArch64][GlobalISel] Add some post-legalization cast combines. (#112509)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 18 01:57:29 PDT 2024


Author: David Green
Date: 2024-10-18T09:57:25+01:00
New Revision: 2f792f6e7157751441b06c7212edfea1a0651a27

URL: https://github.com/llvm/llvm-project/commit/2f792f6e7157751441b06c7212edfea1a0651a27
DIFF: https://github.com/llvm/llvm-project/commit/2f792f6e7157751441b06c7212edfea1a0651a27.diff

LOG: [AArch64][GlobalISel] Add some post-legalization cast combines. (#112509)

This helps clear up some of the legalization artefacts. Not all of the
cast_combines are added (notably select combines) as they currently have
questionable benefit in the test updates.

Added: 
    

Modified: 
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
    llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
    llvm/test/CodeGen/AArch64/add.ll
    llvm/test/CodeGen/AArch64/and-mask-removal.ll
    llvm/test/CodeGen/AArch64/andorxor.ll
    llvm/test/CodeGen/AArch64/bitcast.ll
    llvm/test/CodeGen/AArch64/concat-vector.ll
    llvm/test/CodeGen/AArch64/fcmp.ll
    llvm/test/CodeGen/AArch64/itofp.ll
    llvm/test/CodeGen/AArch64/mul.ll
    llvm/test/CodeGen/AArch64/sub.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 77cb4370b54664..d0373a7dadfcf9 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1791,20 +1791,24 @@ class integer_of_opcode<Instruction castOpcode> : GICombineRule <
 
 def integer_of_truncate : integer_of_opcode<G_TRUNC>;
 
-def cast_combines: GICombineGroup<[
+def cast_of_cast_combines: GICombineGroup<[
   truncate_of_zext,
   truncate_of_sext,
   truncate_of_anyext,
-  select_of_zext,
-  select_of_anyext,
-  select_of_truncate,
   zext_of_zext,
   zext_of_anyext,
   sext_of_sext,
   sext_of_anyext,
   anyext_of_anyext,
   anyext_of_zext,
-  anyext_of_sext,
+  anyext_of_sext
+]>;
+
+def cast_combines: GICombineGroup<[
+  cast_of_cast_combines,
+  select_of_zext,
+  select_of_anyext,
+  select_of_truncate,
   buildvector_of_truncate,
   narrow_binop_add,
   narrow_binop_sub,

diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index ead6455ddd5278..321190c83b79f3 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -314,9 +314,9 @@ def AArch64PostLegalizerLowering
 // Post-legalization combines which are primarily optimizations.
 def AArch64PostLegalizerCombiner
     : GICombiner<"AArch64PostLegalizerCombinerImpl",
-                       [copy_prop, combines_for_extload,
-                        combine_indexed_load_store,
-                        sext_trunc_sextload, mutate_anyext_to_zext,
+                       [copy_prop, cast_of_cast_combines, buildvector_of_truncate,
+                        integer_of_truncate, mutate_anyext_to_zext,
+                        combines_for_extload, combine_indexed_load_store, sext_trunc_sextload,
                         hoist_logic_op_with_same_opcode_hands,
                         redundant_and, xor_of_and_with_same_reg,
                         extractvecelt_pairwise_add, redundant_or,

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
index ae04cc77dcaf13..b045deebc56e03 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -135,20 +135,13 @@ name:            test_combine_trunc_build_vector
 legalized: true
 body:             |
   bb.1:
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_build_vector
-    ; CHECK-PRE: %arg1:_(s64) = COPY $x0
-    ; CHECK-PRE-NEXT: %arg2:_(s64) = COPY $x0
-    ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
-    ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64)
-    ; CHECK-PRE-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
-    ; CHECK-PRE-NEXT: $x0 = COPY %small(<2 x s32>)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_build_vector
-    ; CHECK-POST: %arg1:_(s64) = COPY $x0
-    ; CHECK-POST-NEXT: %arg2:_(s64) = COPY $x0
-    ; CHECK-POST-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
-    ; CHECK-POST-NEXT: %small:_(<2 x s32>) = G_TRUNC %bv(<2 x s64>)
-    ; CHECK-POST-NEXT: $x0 = COPY %small(<2 x s32>)
+    ; CHECK-LABEL: name: test_combine_trunc_build_vector
+    ; CHECK: %arg1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %arg2:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %arg1(s64)
+    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %arg2(s64)
+    ; CHECK-NEXT: %small:_(<2 x s32>) = G_BUILD_VECTOR [[TRUNC]](s32), [[TRUNC1]](s32)
+    ; CHECK-NEXT: $x0 = COPY %small(<2 x s32>)
     %arg1:_(s64) = COPY $x0
     %arg2:_(s64) = COPY $x0
     %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
index 4a38b5d4c63dd9..9a2b9dd4b2b608 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir
@@ -32,20 +32,12 @@ legalized: true
 body:             |
   bb.1:
   liveins: $h0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s16
-    ; CHECK-PRE: liveins: $h0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16)
-    ; CHECK-PRE-NEXT: $w0 = COPY [[ANYEXT]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s16
-    ; CHECK-POST: liveins: $h0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s16)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s64)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s16
+    ; CHECK: liveins: $h0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16)
+    ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
     %0:_(s16) = COPY $h0
     %1:_(s64) = G_ANYEXT %0(s16)
     %2:_(s32) = G_TRUNC %1(s64)
@@ -82,20 +74,12 @@ legalized: true
 body:             |
   bb.1:
   liveins: $h0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_sext_s32_s16
-    ; CHECK-PRE: liveins: $h0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-PRE-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16)
-    ; CHECK-PRE-NEXT: $w0 = COPY [[SEXT]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_sext_s32_s16
-    ; CHECK-POST: liveins: $h0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-POST-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s16)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SEXT]](s64)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_sext_s32_s16
+    ; CHECK: liveins: $h0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[COPY]](s16)
+    ; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
     %0:_(s16) = COPY $h0
     %1:_(s64) = G_SEXT %0(s16)
     %2:_(s32) = G_TRUNC %1(s64)
@@ -107,20 +91,12 @@ legalized: true
 body:             |
   bb.1:
   liveins: $h0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_zext_s32_s16
-    ; CHECK-PRE: liveins: $h0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16)
-    ; CHECK-PRE-NEXT: $w0 = COPY [[ZEXT]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_zext_s32_s16
-    ; CHECK-POST: liveins: $h0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
-    ; CHECK-POST-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s16)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ZEXT]](s64)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_zext_s32_s16
+    ; CHECK: liveins: $h0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY]](s16)
+    ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32)
     %0:_(s16) = COPY $h0
     %1:_(s64) = G_ZEXT %0(s16)
     %2:_(s32) = G_TRUNC %1(s64)
@@ -132,19 +108,11 @@ legalized: true
 body:             |
   bb.1:
   liveins: $w0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s32
-    ; CHECK-PRE: liveins: $w0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-    ; CHECK-PRE-NEXT: $w0 = COPY [[COPY]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s32
-    ; CHECK-POST: liveins: $w0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
-    ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s64)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s32
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
     %0:_(s32) = COPY $w0
     %1:_(s64) = G_ANYEXT %0(s32)
     %2:_(s32) = G_TRUNC %1(s64)
@@ -156,20 +124,12 @@ legalized: true
 body:             |
   bb.1:
   liveins: $x0
-    ; CHECK-PRE-LABEL: name: test_combine_trunc_anyext_s32_s64
-    ; CHECK-PRE: liveins: $x0
-    ; CHECK-PRE-NEXT: {{  $}}
-    ; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
-    ; CHECK-PRE-NEXT: $w0 = COPY [[TRUNC]](s32)
-    ;
-    ; CHECK-POST-LABEL: name: test_combine_trunc_anyext_s32_s64
-    ; CHECK-POST: liveins: $x0
-    ; CHECK-POST-NEXT: {{  $}}
-    ; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
-    ; CHECK-POST-NEXT: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[COPY]](s64)
-    ; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ANYEXT]](s128)
-    ; CHECK-POST-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-LABEL: name: test_combine_trunc_anyext_s32_s64
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
     %0:_(s64) = COPY $x0
     %1:_(s128) = G_ANYEXT %0(s64)
     %2:_(s32) = G_TRUNC %1(s128)

diff  --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll
index fc1a0c71d4cdf0..ce7e3101a7a541 100644
--- a/llvm/test/CodeGen/AArch64/add.ll
+++ b/llvm/test/CodeGen/AArch64/add.ll
@@ -171,11 +171,7 @@ define void @v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index f005ca47ad124f..09f00b3845f25f 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -530,10 +530,10 @@ define i64 @test_2_selects(i8 zeroext %a) {
 ; CHECK-LABEL: test_2_selects:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    add w9, w0, #24
-; CHECK-NEXT:    mov w8, #131
+; CHECK-NEXT:    mov w8, #131 ; =0x83
 ; CHECK-NEXT:    and w9, w9, #0xff
 ; CHECK-NEXT:    cmp w9, #81
-; CHECK-NEXT:    mov w9, #57
+; CHECK-NEXT:    mov w9, #57 ; =0x39
 ; CHECK-NEXT:    csel x8, x8, xzr, lo
 ; CHECK-NEXT:    csel x9, xzr, x9, eq
 ; CHECK-NEXT:    add x0, x8, x9

diff  --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll
index 5385a917619fa0..459daece90deed 100644
--- a/llvm/test/CodeGen/AArch64/andorxor.ll
+++ b/llvm/test/CodeGen/AArch64/andorxor.ll
@@ -463,11 +463,7 @@ define void @and_v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret
@@ -514,11 +510,7 @@ define void @or_v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    orr v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret
@@ -565,11 +557,7 @@ define void @xor_v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    eor v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index 79cfeedb74bce0..bbdf8b0a13d358 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -60,11 +60,7 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-GI-LABEL: bitcast_v4i8_i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
   %c = add <4 x i8> %a, %b
@@ -116,9 +112,7 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-GI-LABEL: bitcast_v2i16_i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-GI-NEXT:    xtn v0.4h, v1.4s
+; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
   %c = add <2 x i16> %a, %b
@@ -418,9 +412,7 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-GI-LABEL: bitcast_v2i16_v4i8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add v0.2s, v0.2s, v1.2s
-; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-GI-NEXT:    xtn v0.4h, v1.4s
+; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    mov b1, v0.b[1]
 ; CHECK-GI-NEXT:    mov v2.b[0], v0.b[0]
 ; CHECK-GI-NEXT:    mov b3, v0.b[2]
@@ -455,11 +447,7 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-GI-LABEL: bitcast_v4i8_v2i16:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0

diff  --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index d800b2549cf223..0033999b9bd51d 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -33,18 +33,8 @@ define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) {
 ;
 ; CHECK-GI-LABEL: concat2:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov v2.h[0], v0.h[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    mov v3.h[0], v1.h[0]
-; CHECK-GI-NEXT:    mov v2.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v3.h[1], v1.h[1]
-; CHECK-GI-NEXT:    mov v2.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v3.h[2], v1.h[2]
-; CHECK-GI-NEXT:    mov v2.h[3], v0.h[3]
-; CHECK-GI-NEXT:    mov v3.h[3], v1.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v2.8h
-; CHECK-GI-NEXT:    xtn v1.8b, v3.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    uzp1 v1.8b, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
@@ -74,15 +64,9 @@ define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) {
 ;
 ; CHECK-GI-LABEL: concat4:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov v2.s[0], v0.s[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    mov v2.s[1], v0.s[1]
-; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
-; CHECK-GI-NEXT:    xtn v2.4h, v2.4s
-; CHECK-GI-NEXT:    mov v0.s[1], v1.s[1]
-; CHECK-GI-NEXT:    xtn v1.4h, v0.4s
-; CHECK-GI-NEXT:    fmov w8, s2
+; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    uzp1 v1.4h, v1.4h, v0.4h
+; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
@@ -183,12 +167,11 @@ define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) {
 ;
 ; CHECK-GI-LABEL: concat_v8s16_v2s16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    ldr h0, [x0]
-; CHECK-GI-NEXT:    ldr h1, [x0, #2]
-; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-GI-NEXT:    xtn v0.4h, v0.4s
-; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    ldrh w8, [x0]
+; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v1.h[1], w9
+; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
 ; CHECK-GI-NEXT:    ret
     %a = load <2 x i16>, ptr %ptr
     %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -238,34 +221,14 @@ define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, <
 ;
 ; CHECK-GI-LABEL: concat_v16s8_v4s8_reg:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov v4.h[0], v0.h[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    mov v5.h[0], v1.h[0]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    mov v6.h[0], v2.h[0]
-; CHECK-GI-NEXT:    mov v7.h[0], v3.h[0]
-; CHECK-GI-NEXT:    mov v4.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v5.h[1], v1.h[1]
-; CHECK-GI-NEXT:    mov v6.h[1], v2.h[1]
-; CHECK-GI-NEXT:    mov v7.h[1], v3.h[1]
-; CHECK-GI-NEXT:    mov v4.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v5.h[2], v1.h[2]
-; CHECK-GI-NEXT:    mov v6.h[2], v2.h[2]
-; CHECK-GI-NEXT:    mov v7.h[2], v3.h[2]
-; CHECK-GI-NEXT:    mov v4.h[3], v0.h[3]
-; CHECK-GI-NEXT:    mov v5.h[3], v1.h[3]
-; CHECK-GI-NEXT:    mov v6.h[3], v2.h[3]
-; CHECK-GI-NEXT:    mov v7.h[3], v3.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v4.8h
-; CHECK-GI-NEXT:    xtn v1.8b, v5.8h
-; CHECK-GI-NEXT:    xtn v2.8b, v6.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-GI-NEXT:    uzp1 v1.8b, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
-; CHECK-GI-NEXT:    xtn v1.8b, v7.8h
+; CHECK-GI-NEXT:    uzp1 v2.8b, v2.8b, v0.8b
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    uzp1 v1.8b, v3.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s2
 ; CHECK-GI-NEXT:    mov v0.s[2], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
@@ -291,29 +254,17 @@ define <8 x i16> @concat_v8s16_v2s16_reg(<2 x i16> %A, <2 x i16> %B, <2 x i16> %
 ;
 ; CHECK-GI-LABEL: concat_v8s16_v2s16_reg:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    mov v4.s[0], v0.s[0]
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-GI-NEXT:    mov v5.s[0], v1.s[0]
-; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    mov v4.s[1], v0.s[1]
-; CHECK-GI-NEXT:    mov v5.s[1], v1.s[1]
-; CHECK-GI-NEXT:    mov v1.s[0], v2.s[0]
-; CHECK-GI-NEXT:    xtn v0.4h, v4.4s
-; CHECK-GI-NEXT:    xtn v4.4h, v5.4s
-; CHECK-GI-NEXT:    mov v1.s[1], v2.s[1]
-; CHECK-GI-NEXT:    mov v2.s[0], v3.s[0]
+; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT:    uzp1 v1.4h, v1.4h, v0.4h
 ; CHECK-GI-NEXT:    fmov w8, s0
-; CHECK-GI-NEXT:    xtn v1.4h, v1.4s
-; CHECK-GI-NEXT:    mov v2.s[1], v3.s[1]
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
-; CHECK-GI-NEXT:    fmov w8, s4
-; CHECK-GI-NEXT:    xtn v2.4h, v2.4s
-; CHECK-GI-NEXT:    mov v0.s[1], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
-; CHECK-GI-NEXT:    mov v0.s[2], w8
+; CHECK-GI-NEXT:    uzp1 v2.4h, v2.4h, v0.4h
+; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    uzp1 v1.4h, v3.4h, v0.4h
 ; CHECK-GI-NEXT:    fmov w8, s2
+; CHECK-GI-NEXT:    mov v0.s[2], w8
+; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[3], w8
 ; CHECK-GI-NEXT:    ret
     %b = shufflevector <2 x i16> %A, <2 x i16> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>

diff  --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index baab53d8bdbd46..66f26fc9d85973 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -922,26 +922,27 @@ define <3 x i32> @v3f64_i32(<3 x double> %a, <3 x double> %b, <3 x i32> %d, <3 x
 ; CHECK-GI-LABEL: v3f64_i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
 ; CHECK-GI-NEXT:    fcmp d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v1.s[0], w8
 ; CHECK-GI-NEXT:    cset w9, mi
-; CHECK-GI-NEXT:    mov v2.d[0], x9
+; CHECK-GI-NEXT:    mov v2.s[0], w9
 ; CHECK-GI-NEXT:    mov w9, #-1 // =0xffffffff
 ; CHECK-GI-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
 ; CHECK-GI-NEXT:    mov v1.s[1], w8
 ; CHECK-GI-NEXT:    mov v3.s[0], w9
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-GI-NEXT:    mov v1.s[2], w8
-; CHECK-GI-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
 ; CHECK-GI-NEXT:    mov v3.s[1], w9
+; CHECK-GI-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-GI-NEXT:    mov v3.s[2], w9
 ; CHECK-GI-NEXT:    ushl v0.4s, v0.4s, v1.4s
 ; CHECK-GI-NEXT:    neg v1.4s, v1.4s
-; CHECK-GI-NEXT:    mov v3.s[2], w9
 ; CHECK-GI-NEXT:    sshl v0.4s, v0.4s, v1.4s
 ; CHECK-GI-NEXT:    eor v1.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    and v0.16b, v6.16b, v0.16b

diff  --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index c5bde81ba4a5ea..81c1a64f2d434f 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -7937,10 +7937,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: stofp_v2i8_v2f16:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT:    mov v1.s[0], v0.s[0]
-; CHECK-GI-FP16-NEXT:    mov v1.s[1], v0.s[1]
-; CHECK-GI-FP16-NEXT:    xtn v0.4h, v1.4s
+; CHECK-GI-FP16-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    shl v0.4h, v0.4h, #8
 ; CHECK-GI-FP16-NEXT:    sshr v0.4h, v0.4h, #8
 ; CHECK-GI-FP16-NEXT:    scvtf v0.4h, v0.4h

diff  --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll
index 9e748c9641aa8c..5e7f71c18c27a0 100644
--- a/llvm/test/CodeGen/AArch64/mul.ll
+++ b/llvm/test/CodeGen/AArch64/mul.ll
@@ -183,11 +183,7 @@ define void @v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    mul v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sub.ll b/llvm/test/CodeGen/AArch64/sub.ll
index 8e7586bd4843c7..c298e6d8a1ff2a 100644
--- a/llvm/test/CodeGen/AArch64/sub.ll
+++ b/llvm/test/CodeGen/AArch64/sub.ll
@@ -171,11 +171,7 @@ define void @v4i8(ptr %p1, ptr %p2) {
 ; CHECK-GI-NEXT:    ushll v0.8h, v3.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v5.8b, #0
 ; CHECK-GI-NEXT:    sub v0.4h, v0.4h, v1.4h
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
-; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
-; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
-; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
-; CHECK-GI-NEXT:    xtn v0.8b, v1.8h
+; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    str w8, [x0]
 ; CHECK-GI-NEXT:    ret


        


More information about the llvm-commits mailing list