[llvm] [AArch64][GlobalISel] Fold buildvector of bitcast (PR #141553)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon May 26 23:46:53 PDT 2025
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/141553
This adds a combine for buildvectors from bitcast values, sinking the bitcast and generating a buildvector from the original scalar type.
```
%5:_(<4 x s8>) = G_BITCAST %16:_(s32)
%18:_(s8), %19:_(s8), %20:_(s8), %21:_(s8) = G_UNMERGE_VALUES %5:_(<4 x s8>)
%22:_(s8) = G_IMPLICIT_DEF
%23:_(<8 x s8>) = G_BUILD_VECTOR %18:_(s8), %19:_(s8), %20:_(s8), %21:_(s8), %22:_(s8), %22:_(s8), %22:_(s8), %22:_(s8)
=>
<2 x s32> G_BUILD_VECTOR %16, %undef
<8 x s8> G_BITCAST
```
It helps clean up some of the inefficiencies from widening scalar types.
>From dbe147519384c4c453cf9d442cc784dec7ee7670 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 27 May 2025 07:44:09 +0100
Subject: [PATCH] [AArch64][GlobalISel] Fold buildvector of bitcast
This adds a combine for buildvectors from bitcast values, sinking the bitcast
and generating a buildvector from the original scalar type.
%5:_(<4 x s8>) = G_BITCAST %16:_(s32)
%18:_(s8), %19:_(s8), %20:_(s8), %21:_(s8) = G_UNMERGE_VALUES %5:_(<4 x s8>)
%22:_(s8) = G_IMPLICIT_DEF
%23:_(<8 x s8>) = G_BUILD_VECTOR %18:_(s8), %19:_(s8), %20:_(s8), %21:_(s8), %22:_(s8), %22:_(s8), %22:_(s8), %22:_(s8)
=>
<2 x s32> G_BUILD_VECTOR %16, %undef
<8 x s8> G_BITCAST
It helps clean up some of the inefficiencies from widening scalar types.
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 7 ++
.../include/llvm/Target/GlobalISel/Combine.td | 7 ++
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 66 ++++++++++++++++
llvm/lib/Target/AArch64/AArch64Combine.td | 2 +-
llvm/test/CodeGen/AArch64/add.ll | 26 +------
llvm/test/CodeGen/AArch64/andorxor.ll | 78 +++----------------
llvm/test/CodeGen/AArch64/bitcast-extend.ll | 22 +-----
llvm/test/CodeGen/AArch64/bitcast.ll | 17 +---
llvm/test/CodeGen/AArch64/ctlz.ll | 12 +--
llvm/test/CodeGen/AArch64/ctpop.ll | 12 +--
llvm/test/CodeGen/AArch64/cttz.ll | 25 ++----
llvm/test/CodeGen/AArch64/mul.ll | 26 +------
llvm/test/CodeGen/AArch64/neon-dotreduce.ll | 12 +--
llvm/test/CodeGen/AArch64/sadd_sat_vec.ll | 22 +-----
llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 22 +-----
llvm/test/CodeGen/AArch64/sub.ll | 26 +------
llvm/test/CodeGen/AArch64/uadd_sat_vec.ll | 22 +-----
llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 22 +-----
18 files changed, 138 insertions(+), 288 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index c15263e0b06f8..bfb088dcd7acb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -265,6 +265,13 @@ class CombinerHelper {
bool matchCombineShuffleToBuildVector(MachineInstr &MI) const;
void applyCombineShuffleToBuildVector(MachineInstr &MI) const;
+ /// Combine G_BUILD_VECTOR(G_UNMERGE(G_BITCAST), Undef) to
+ /// G_BITCAST(G_BUILD_VECTOR(..))
+ bool matchCombineBuildVectorOfBitcast(MachineInstr &MI,
+ SmallVector<Register> &Ops) const;
+ void applyCombineBuildVectorOfBitcast(MachineInstr &MI,
+ SmallVector<Register> &Ops) const;
+
/// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
/// Returns true if MI changed.
///
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index efd88524a159e..ea55f1341b68c 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1579,6 +1579,13 @@ def combine_shuffle_vector_to_build_vector : GICombineRule<
[{ return Helper.matchCombineShuffleToBuildVector(*${root}); }]),
(apply [{ Helper.applyCombineShuffleToBuildVector(*${root}); }])>;
+// Combines buildvector operations
+def combine_build_vector_of_bitcast : GICombineRule<
+ (defs root:$root, concat_matchinfo:$matchinfo),
+ (match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
+ [{ return Helper.matchCombineBuildVectorOfBitcast(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyCombineBuildVectorOfBitcast(*${root}, ${matchinfo}); }])>;
+
def insert_vector_element_idx_undef : GICombineRule<
(defs root:$root),
(match (G_IMPLICIT_DEF $idx),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1e851183de0d..3e8044e178e05 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -386,6 +386,72 @@ void CombinerHelper::applyCombineConcatVectors(
MI.eraseFromParent();
}
+bool CombinerHelper::matchCombineBuildVectorOfBitcast(
+ MachineInstr &MI, SmallVector<Register> &Ops) const {
+ assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR &&
+ "Invalid instruction");
+
+ GUnmerge *Unmerge =
+ dyn_cast<GUnmerge>(MRI.getVRegDef(MI.getOperand(1).getReg()));
+ if (!Unmerge || Unmerge->getReg(0) != MI.getOperand(1).getReg())
+ return false;
+ MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
+ if (BC->getOpcode() != TargetOpcode::G_BITCAST)
+ return false;
+ LLT InputTy = MRI.getType(BC->getOperand(1).getReg());
+ unsigned Factor = Unmerge->getNumDefs();
+ if (!InputTy.isScalar() || (MI.getNumOperands() - 1) % Factor != 0)
+ return false;
+
+ // Check if the build_vector is legal
+ LLT BVDstTy = LLT::fixed_vector((MI.getNumOperands() - 1) / Factor, InputTy);
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_BUILD_VECTOR, {BVDstTy, InputTy}}))
+ return false;
+
+ for (unsigned Idx = 0; Idx < MI.getNumOperands() - 1; Idx += Factor) {
+ GUnmerge *Unmerge =
+ dyn_cast<GUnmerge>(MRI.getVRegDef(MI.getOperand(Idx + 1).getReg()));
+ if (!all_of(iota_range<unsigned>(0, Factor, false), [&](unsigned J) {
+ MachineInstr *Src =
+ MRI.getVRegDef(MI.getOperand(Idx + J + 1).getReg());
+ if (Src->getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
+ return true;
+ return Unmerge &&
+ MI.getOperand(Idx + J + 1).getReg() == Unmerge->getReg(J);
+ }))
+ return false;
+ if (!Unmerge)
+ Ops.push_back(0);
+ else {
+ MachineInstr *BC = MRI.getVRegDef(Unmerge->getSourceReg());
+ if (BC->getOpcode() != TargetOpcode::G_BITCAST ||
+ MRI.getType(BC->getOperand(1).getReg()) != InputTy)
+ return false;
+ Ops.push_back(BC->getOperand(1).getReg());
+ }
+ }
+
+ return true;
+}
+void CombinerHelper::applyCombineBuildVectorOfBitcast(
+ MachineInstr &MI, SmallVector<Register> &Ops) const {
+ LLT SrcTy = MRI.getType(Ops[0]);
+ Register Undef = 0;
+ for (Register &Op : Ops) {
+ if (!Op) {
+ if (!Undef)
+ Undef = Builder.buildUndef(SrcTy).getReg(0);
+ Op = Undef;
+ }
+ }
+
+ LLT BVDstTy = LLT::fixed_vector(Ops.size(), SrcTy);
+ auto BV = Builder.buildBuildVector(BVDstTy, Ops);
+ Builder.buildBitcast(MI.getOperand(0).getReg(), BV);
+ MI.eraseFromParent();
+}
+
bool CombinerHelper::matchCombineShuffleToBuildVector(MachineInstr &MI) const {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
"Invalid instruction");
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index f84e83816bf33..90a8ba32d50f5 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -344,7 +344,7 @@ def AArch64PostLegalizerCombiner
constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
- select_to_minmax, or_to_bsp, combine_concat_vector,
+ select_to_minmax, or_to_bsp, combine_concat_vector, combine_build_vector_of_bitcast,
commute_constant_to_rhs,
push_freeze_to_prevent_poison_from_propagating,
combine_mul_cmlt, combine_use_vector_truncate, extmultomull]> {
diff --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll
index d5bd1b712a2a6..689c4b9c516c0 100644
--- a/llvm/test/CodeGen/AArch64/add.ll
+++ b/llvm/test/CodeGen/AArch64/add.ll
@@ -149,28 +149,10 @@ define void @v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
diff --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll
index f7df1092287bd..04a350b3fc666 100644
--- a/llvm/test/CodeGen/AArch64/andorxor.ll
+++ b/llvm/test/CodeGen/AArch64/andorxor.ll
@@ -439,28 +439,10 @@ define void @and_v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: and_v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
@@ -488,28 +470,10 @@ define void @or_v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: or_v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
@@ -537,28 +501,10 @@ define void @xor_v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: xor_v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: eor v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 3133d0efb4b9b..47cb168065bb5 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -12,15 +12,8 @@ define <4 x i16> @z_i32_v4i16(i32 %x) {
;
; CHECK-GI-LABEL: z_i32_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov s0, w0
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0
+; CHECK-GI-NEXT: mov v0.s[0], w0
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%b = bitcast i32 %x to <4 x i8>
@@ -115,15 +108,8 @@ define <4 x i16> @s_i32_v4i16(i32 %x) {
;
; CHECK-GI-LABEL: s_i32_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fmov s0, w0
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: sshll v0.8h, v2.8b, #0
+; CHECK-GI-NEXT: mov v0.s[0], w0
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%b = bitcast i32 %x to <4 x i8>
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index d54cc4adb81b3..442471951bab5 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -79,16 +79,8 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
; CHECK-GI-LABEL: bitcast_i32_v4i8:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add w8, w0, w1
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: mov b1, v0.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: mov v0.s[0], w8
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%c = add i32 %a, %b
@@ -131,11 +123,8 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
; CHECK-GI-LABEL: bitcast_i32_v2i16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: add w8, w0, w1
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov v0.s[0], w8
-; CHECK-GI-NEXT: fmov w8, s1
-; CHECK-GI-NEXT: mov v0.s[1], w8
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%c = add i32 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll
index a4863d1f74200..512095863e0aa 100644
--- a/llvm/test/CodeGen/AArch64/ctlz.ll
+++ b/llvm/test/CodeGen/AArch64/ctlz.ll
@@ -87,16 +87,8 @@ define void @v4i8(ptr %p1) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: clz v0.8b, v2.8b
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: clz v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: str w8, [x0]
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/ctpop.ll b/llvm/test/CodeGen/AArch64/ctpop.ll
index 55f75b6bc3f27..356e668d050c9 100644
--- a/llvm/test/CodeGen/AArch64/ctpop.ll
+++ b/llvm/test/CodeGen/AArch64/ctpop.ll
@@ -85,16 +85,8 @@ define void @v4i8(ptr %p1) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: cnt v0.8b, v2.8b
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: cnt v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: str w8, [x0]
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/cttz.ll b/llvm/test/CodeGen/AArch64/cttz.ll
index 60125f8a19811..60b589885c111 100644
--- a/llvm/test/CodeGen/AArch64/cttz.ll
+++ b/llvm/test/CodeGen/AArch64/cttz.ll
@@ -114,24 +114,15 @@ define void @v4i8(ptr %p1) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w9, [x0]
; CHECK-GI-NEXT: mov w8, #255 // =0xff
-; CHECK-GI-NEXT: fmov s0, w9
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov s1, w8
-; CHECK-GI-NEXT: mov v0.h[1], w9
-; CHECK-GI-NEXT: mov v1.h[1], w8
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w9
-; CHECK-GI-NEXT: mov v1.h[2], w8
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w9
-; CHECK-GI-NEXT: mov v1.h[3], w8
-; CHECK-GI-NEXT: eor v2.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: ldr s1, [x0]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: mov v0.h[2], w8
+; CHECK-GI-NEXT: mov v0.h[3], w8
+; CHECK-GI-NEXT: eor v2.8b, v1.8b, v0.8b
+; CHECK-GI-NEXT: add v0.4h, v1.4h, v0.4h
; CHECK-GI-NEXT: and v0.8b, v2.8b, v0.8b
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: cnt v0.8b, v0.8b
diff --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll
index 8d9a6e6b92914..0109c716737dd 100644
--- a/llvm/test/CodeGen/AArch64/mul.ll
+++ b/llvm/test/CodeGen/AArch64/mul.ll
@@ -161,28 +161,10 @@ define void @v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
index 2d3fda704908e..daae69fd4a949 100644
--- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll
@@ -93,16 +93,8 @@ define i32 @test_udot_v4i8_nomla(ptr nocapture readonly %a1) {
;
; CHECK-GI-LABEL: test_udot_v4i8_nomla:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b3, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v2.b[1], v1.b[0]
-; CHECK-GI-NEXT: mov v2.b[2], v3.b[0]
-; CHECK-GI-NEXT: mov v2.b[3], v0.b[0]
-; CHECK-GI-NEXT: ushll v0.8h, v2.8b, #0
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: uaddlv s0, v0.4h
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: and w0, w8, #0xffff
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index e1018bbee7893..af205a8352ad2 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -158,25 +158,9 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov v3.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[1]
-; CHECK-GI-NEXT: mov v5.b[0], v1.b[0]
-; CHECK-GI-NEXT: mov v3.b[1], v2.b[0]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v5.b[1], v4.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[2]
-; CHECK-GI-NEXT: mov b1, v1.b[3]
-; CHECK-GI-NEXT: mov v3.b[2], v2.b[0]
-; CHECK-GI-NEXT: mov v5.b[2], v4.b[0]
-; CHECK-GI-NEXT: mov v3.b[3], v0.b[0]
-; CHECK-GI-NEXT: mov v5.b[3], v1.b[0]
-; CHECK-GI-NEXT: sqadd v0.8b, v3.8b, v5.8b
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: str w8, [x2]
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index 085857c0c5428..0da776c12ac9e 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -159,25 +159,9 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov v3.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[1]
-; CHECK-GI-NEXT: mov v5.b[0], v1.b[0]
-; CHECK-GI-NEXT: mov v3.b[1], v2.b[0]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v5.b[1], v4.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[2]
-; CHECK-GI-NEXT: mov b1, v1.b[3]
-; CHECK-GI-NEXT: mov v3.b[2], v2.b[0]
-; CHECK-GI-NEXT: mov v5.b[2], v4.b[0]
-; CHECK-GI-NEXT: mov v3.b[3], v0.b[0]
-; CHECK-GI-NEXT: mov v5.b[3], v1.b[0]
-; CHECK-GI-NEXT: sqsub v0.8b, v3.8b, v5.8b
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: str w8, [x2]
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sub.ll b/llvm/test/CodeGen/AArch64/sub.ll
index 7a436eddb23a6..a1135a94509de 100644
--- a/llvm/test/CodeGen/AArch64/sub.ll
+++ b/llvm/test/CodeGen/AArch64/sub.ll
@@ -149,28 +149,10 @@ define void @v4i8(ptr %p1, ptr %p2) {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov b3, v1.b[1]
-; CHECK-GI-NEXT: mov b4, v0.b[2]
-; CHECK-GI-NEXT: mov b5, v0.b[3]
-; CHECK-GI-NEXT: fmov w8, s2
-; CHECK-GI-NEXT: mov b2, v1.b[2]
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov b3, v1.b[3]
-; CHECK-GI-NEXT: mov v0.h[1], w8
-; CHECK-GI-NEXT: mov v1.h[1], w9
-; CHECK-GI-NEXT: fmov w8, s4
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mov v0.h[2], w8
-; CHECK-GI-NEXT: mov v1.h[2], w9
-; CHECK-GI-NEXT: fmov w8, s5
-; CHECK-GI-NEXT: fmov w9, s3
-; CHECK-GI-NEXT: mov v0.h[3], w8
-; CHECK-GI-NEXT: mov v1.h[3], w9
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-GI-NEXT: sub v0.4h, v0.4h, v1.4h
; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b
; CHECK-GI-NEXT: fmov w8, s0
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index b0b3198fda0e6..9135d785fba7e 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -155,25 +155,9 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov v3.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[1]
-; CHECK-GI-NEXT: mov v5.b[0], v1.b[0]
-; CHECK-GI-NEXT: mov v3.b[1], v2.b[0]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v5.b[1], v4.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[2]
-; CHECK-GI-NEXT: mov b1, v1.b[3]
-; CHECK-GI-NEXT: mov v3.b[2], v2.b[0]
-; CHECK-GI-NEXT: mov v5.b[2], v4.b[0]
-; CHECK-GI-NEXT: mov v3.b[3], v0.b[0]
-; CHECK-GI-NEXT: mov v5.b[3], v1.b[0]
-; CHECK-GI-NEXT: uqadd v0.8b, v3.8b, v5.8b
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: str w8, [x2]
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index 54754e7fbaed6..cdbb8623b9b7c 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -156,25 +156,9 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind {
;
; CHECK-GI-LABEL: v4i8:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr w8, [x0]
-; CHECK-GI-NEXT: ldr w9, [x1]
-; CHECK-GI-NEXT: fmov s0, w8
-; CHECK-GI-NEXT: fmov s1, w9
-; CHECK-GI-NEXT: mov b2, v0.b[1]
-; CHECK-GI-NEXT: mov v3.b[0], v0.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[1]
-; CHECK-GI-NEXT: mov v5.b[0], v1.b[0]
-; CHECK-GI-NEXT: mov v3.b[1], v2.b[0]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b0, v0.b[3]
-; CHECK-GI-NEXT: mov v5.b[1], v4.b[0]
-; CHECK-GI-NEXT: mov b4, v1.b[2]
-; CHECK-GI-NEXT: mov b1, v1.b[3]
-; CHECK-GI-NEXT: mov v3.b[2], v2.b[0]
-; CHECK-GI-NEXT: mov v5.b[2], v4.b[0]
-; CHECK-GI-NEXT: mov v3.b[3], v0.b[0]
-; CHECK-GI-NEXT: mov v5.b[3], v1.b[0]
-; CHECK-GI-NEXT: uqsub v0.8b, v3.8b, v5.8b
+; CHECK-GI-NEXT: ldr s0, [x0]
+; CHECK-GI-NEXT: ldr s1, [x1]
+; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b
; CHECK-GI-NEXT: fmov w8, s0
; CHECK-GI-NEXT: str w8, [x2]
; CHECK-GI-NEXT: ret
More information about the llvm-commits
mailing list