[llvm] [AArch64][GlobalISel] Lower scalarizing G_UNMERGE_VALUES to G_EXTRACT_VECTOR_ELT (PR #75662)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 15 13:51:47 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
This adds post-legalizing lowering of G_UNMERGE_VALUES which take a vector and produce scalar values for each lane. They are converted to a G_EXTRACT_VECTOR_ELT for each lane, allowing all the existing tablegen patterns to apply to them.
A couple of tablegen patterns need to be altered to make sure the type of the constant operand is known, so that the patterns are recognized under global isel.
---
Patch is 78.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75662.diff
14 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+10-1)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+6-6)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp (+22)
- (modified) llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll (+76-100)
- (modified) llvm/test/CodeGen/AArch64/aarch64-mulv.ll (+60-95)
- (modified) llvm/test/CodeGen/AArch64/aarch64-smull.ll (+15-3)
- (modified) llvm/test/CodeGen/AArch64/fptoi.ll (+6-10)
- (modified) llvm/test/CodeGen/AArch64/reduce-and.ll (+118-191)
- (modified) llvm/test/CodeGen/AArch64/reduce-or.ll (+118-191)
- (modified) llvm/test/CodeGen/AArch64/reduce-xor.ll (+118-191)
- (modified) llvm/test/CodeGen/AArch64/sext.ll (+14-28)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll (+37-61)
- (modified) llvm/test/CodeGen/AArch64/xtn.ll (+7-17)
- (modified) llvm/test/CodeGen/AArch64/zext.ll (+16-25)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index d6c00be80bd9c2..99f256b887821b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -134,6 +134,14 @@ def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
form_duplane,
shuf_to_ins]>;
+// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
+def vector_unmerge_lowering : GICombineRule <
+ (defs root:$root),
+ (match (wip_match_opcode G_UNMERGE_VALUES):$root,
+ [{ return matchScalarizeVectorUnmerge(*${root}, MRI); }]),
+ (apply [{ applyScalarizeVectorUnmerge(*${root}, MRI, B); }])
+>;
+
def adjust_icmp_imm_matchdata :
GIDefMatchData<"std::pair<uint64_t, CmpInst::Predicate>">;
def adjust_icmp_imm : GICombineRule <
@@ -251,7 +259,8 @@ def AArch64PostLegalizerLowering
icmp_lowering, build_vector_lowering,
lower_vector_fcmp, form_truncstore,
vector_sext_inreg_to_shift,
- unmerge_ext_to_unmerge, lower_mull]> {
+ unmerge_ext_to_unmerge, lower_mull,
+ vector_unmerge_lowering]> {
}
// Post-legalization combines which are primarily optimizations.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 44b0337fe78791..5e60bb80e7fec4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6480,23 +6480,23 @@ def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))),
// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
(EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
-def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))),
+def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
(EXTRACT_SUBREG V128:$src, ssub)>;
def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
(EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
-def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))),
+def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
(EXTRACT_SUBREG V128:$src, dsub)>;
// Floating point vector extractions are codegen'd as either a sequence of
// subregister extractions, or a MOV (aka DUP here) if
// the lane number is anything other than zero.
-def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
+def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
-def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
+def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
-def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
+def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
(f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
-def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
+def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))),
(bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 687063873a16b2..ef0513cfee5131 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -769,6 +769,28 @@ void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}
+bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+ Register Src1Reg = MI.getOperand(MI.getNumOperands() - 1).getReg();
+ const LLT SrcTy = MRI.getType(Src1Reg);
+ return SrcTy.isVector() && !SrcTy.isScalable() &&
+ MI.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
+}
+
+void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) {
+ assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
+ Register Src1Reg = MI.getOperand(MI.getNumOperands() - 1).getReg();
+ const LLT SrcTy = MRI.getType(Src1Reg);
+ assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
+ "Expected a fixed length vector");
+
+ for (int I = 0; I < SrcTy.getNumElements(); ++I) {
+ B.buildExtractVectorElementConstant(MI.getOperand(I).getReg(), Src1Reg, I);
+ }
+ MI.eraseFromParent();
+}
+
bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
auto Splat = getAArch64VectorSplat(MI, MRI);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 5e477e8947d1b8..194fe5be40c2bd 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -516,20 +516,17 @@ define i8 @sminv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: sminv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov h3, v0.h[3]
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w12, v0.h[3]
+; CHECK-GI-NEXT: sxtb w11, w8
+; CHECK-GI-NEXT: cmp w11, w9, sxtb
+; CHECK-GI-NEXT: sxtb w11, w10
+; CHECK-GI-NEXT: csel w8, w8, w9, lt
+; CHECK-GI-NEXT: cmp w11, w12, sxtb
; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: cmp w9, w10, sxtb
-; CHECK-GI-NEXT: sxtb w9, w11
-; CHECK-GI-NEXT: csel w8, w8, w10, lt
-; CHECK-GI-NEXT: fmov w10, s3
-; CHECK-GI-NEXT: cmp w9, w10, sxtb
-; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: csel w10, w11, w10, lt
+; CHECK-GI-NEXT: csel w10, w10, w12, lt
; CHECK-GI-NEXT: cmp w9, w10, sxtb
; CHECK-GI-NEXT: csel w0, w8, w10, lt
; CHECK-GI-NEXT: ret
@@ -611,19 +608,16 @@ define i16 @sminv_v3i16(<3 x i16> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: sxth w8, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
+; CHECK-GI-NEXT: smov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[0]
+; CHECK-GI-NEXT: umov w10, v0.h[1]
+; CHECK-GI-NEXT: smov w11, v0.h[2]
+; CHECK-GI-NEXT: umov w13, v0.h[2]
; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w10, sxth
-; CHECK-GI-NEXT: sxth w8, w11
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: csel w9, w9, w12, lt
-; CHECK-GI-NEXT: cmp w8, w9, sxth
-; CHECK-GI-NEXT: csel w0, w9, w10, gt
+; CHECK-GI-NEXT: cmp w8, w12, sxth
+; CHECK-GI-NEXT: csel w8, w9, w10, lt
+; CHECK-GI-NEXT: cmp w11, w8, sxth
+; CHECK-GI-NEXT: csel w0, w8, w13, gt
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
@@ -887,20 +881,17 @@ define i8 @smaxv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: smaxv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: cmp w9, w10, sxtb
-; CHECK-GI-NEXT: sxtb w9, w11
-; CHECK-GI-NEXT: csel w8, w8, w10, gt
-; CHECK-GI-NEXT: fmov w10, s3
-; CHECK-GI-NEXT: cmp w9, w10, sxtb
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w12, v0.h[3]
+; CHECK-GI-NEXT: sxtb w11, w8
+; CHECK-GI-NEXT: cmp w11, w9, sxtb
+; CHECK-GI-NEXT: sxtb w11, w10
+; CHECK-GI-NEXT: csel w8, w8, w9, gt
+; CHECK-GI-NEXT: cmp w11, w12, sxtb
; CHECK-GI-NEXT: sxtb w9, w8
-; CHECK-GI-NEXT: csel w10, w11, w10, gt
+; CHECK-GI-NEXT: csel w10, w10, w12, gt
; CHECK-GI-NEXT: cmp w9, w10, sxtb
; CHECK-GI-NEXT: csel w0, w8, w10, gt
; CHECK-GI-NEXT: ret
@@ -982,19 +973,16 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: sxth w8, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
+; CHECK-GI-NEXT: smov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[0]
+; CHECK-GI-NEXT: umov w10, v0.h[1]
+; CHECK-GI-NEXT: smov w11, v0.h[2]
+; CHECK-GI-NEXT: umov w13, v0.h[2]
; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w10, sxth
-; CHECK-GI-NEXT: sxth w8, w11
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: csel w9, w9, w12, gt
-; CHECK-GI-NEXT: cmp w8, w9, sxth
-; CHECK-GI-NEXT: csel w0, w9, w10, lt
+; CHECK-GI-NEXT: cmp w8, w12, sxth
+; CHECK-GI-NEXT: csel w8, w9, w10, gt
+; CHECK-GI-NEXT: cmp w11, w8, sxth
+; CHECK-GI-NEXT: csel w0, w8, w13, lt
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a)
@@ -1256,19 +1244,16 @@ define i8 @uminv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: uminv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: fmov w12, s3
-; CHECK-GI-NEXT: and w9, w8, #0xff
-; CHECK-GI-NEXT: cmp w9, w10, uxtb
-; CHECK-GI-NEXT: and w9, w11, #0xff
-; CHECK-GI-NEXT: csel w8, w8, w10, lo
-; CHECK-GI-NEXT: cmp w9, w12, uxtb
-; CHECK-GI-NEXT: csel w9, w11, w12, lo
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
+; CHECK-GI-NEXT: and w12, w8, #0xff
+; CHECK-GI-NEXT: cmp w12, w9, uxtb
+; CHECK-GI-NEXT: and w12, w10, #0xff
+; CHECK-GI-NEXT: csel w8, w8, w9, lo
+; CHECK-GI-NEXT: cmp w12, w11, uxtb
+; CHECK-GI-NEXT: csel w9, w10, w11, lo
; CHECK-GI-NEXT: and w10, w8, #0xff
; CHECK-GI-NEXT: cmp w10, w9, uxtb
; CHECK-GI-NEXT: csel w0, w8, w9, lo
@@ -1351,19 +1336,16 @@ define i16 @uminv_v3i16(<3 x i16> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: uxth w8, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[0]
+; CHECK-GI-NEXT: umov w10, v0.h[1]
+; CHECK-GI-NEXT: umov w11, v0.h[2]
+; CHECK-GI-NEXT: umov w13, v0.h[2]
; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w10, uxth
-; CHECK-GI-NEXT: uxth w8, w11
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: csel w9, w9, w12, lo
-; CHECK-GI-NEXT: cmp w8, w9, uxth
-; CHECK-GI-NEXT: csel w0, w9, w10, hi
+; CHECK-GI-NEXT: cmp w8, w12, uxth
+; CHECK-GI-NEXT: csel w8, w9, w10, lo
+; CHECK-GI-NEXT: cmp w11, w8, uxth
+; CHECK-GI-NEXT: csel w0, w8, w13, hi
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a)
@@ -1625,19 +1607,16 @@ define i8 @umaxv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: umaxv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
-; CHECK-GI-NEXT: fmov w12, s3
-; CHECK-GI-NEXT: and w9, w8, #0xff
-; CHECK-GI-NEXT: cmp w9, w10, uxtb
-; CHECK-GI-NEXT: and w9, w11, #0xff
-; CHECK-GI-NEXT: csel w8, w8, w10, hi
-; CHECK-GI-NEXT: cmp w9, w12, uxtb
-; CHECK-GI-NEXT: csel w9, w11, w12, hi
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
+; CHECK-GI-NEXT: and w12, w8, #0xff
+; CHECK-GI-NEXT: cmp w12, w9, uxtb
+; CHECK-GI-NEXT: and w12, w10, #0xff
+; CHECK-GI-NEXT: csel w8, w8, w9, hi
+; CHECK-GI-NEXT: cmp w12, w11, uxtb
+; CHECK-GI-NEXT: csel w9, w10, w11, hi
; CHECK-GI-NEXT: and w10, w8, #0xff
; CHECK-GI-NEXT: cmp w10, w9, uxtb
; CHECK-GI-NEXT: csel w0, w8, w9, hi
@@ -1719,19 +1698,16 @@ define i16 @umaxv_v3i16(<3 x i16> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s0
-; CHECK-GI-NEXT: uxth w8, w8
-; CHECK-GI-NEXT: fmov w10, s1
-; CHECK-GI-NEXT: fmov w11, s2
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[0]
+; CHECK-GI-NEXT: umov w10, v0.h[1]
+; CHECK-GI-NEXT: umov w11, v0.h[2]
+; CHECK-GI-NEXT: umov w13, v0.h[2]
; CHECK-GI-NEXT: fmov w12, s1
-; CHECK-GI-NEXT: cmp w8, w10, uxth
-; CHECK-GI-NEXT: uxth w8, w11
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: csel w9, w9, w12, hi
-; CHECK-GI-NEXT: cmp w8, w9, uxth
-; CHECK-GI-NEXT: csel w0, w9, w10, lo
+; CHECK-GI-NEXT: cmp w8, w12, uxth
+; CHECK-GI-NEXT: csel w8, w9, w10, hi
+; CHECK-GI-NEXT: cmp w11, w8, uxth
+; CHECK-GI-NEXT: csel w0, w8, w13, lo
; CHECK-GI-NEXT: ret
entry:
%arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
index 90f09379e68fd2..7b7ca9d8ffc2db 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
@@ -73,13 +73,10 @@ define i8 @mulv_v4i8(<4 x i8> %a) {
; CHECK-GI-LABEL: mulv_v4i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: mov h3, v0.h[3]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
+; CHECK-GI-NEXT: umov w11, v0.h[3]
; CHECK-GI-NEXT: mul w8, w8, w9
; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
@@ -113,27 +110,20 @@ define i8 @mulv_v8i8(<8 x i8> %a) {
; CHECK-GI-LABEL: mulv_v8i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: mov b4, v0.b[4]
-; CHECK-GI-NEXT: mov b5, v0.b[5]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov b6, v0.b[6]
-; CHECK-GI-NEXT: mov b7, v0.b[7]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
-; CHECK-GI-NEXT: fmov w12, s5
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: umov w10, v0.b[2]
+; CHECK-GI-NEXT: umov w11, v0.b[3]
+; CHECK-GI-NEXT: umov w12, v0.b[4]
+; CHECK-GI-NEXT: umov w13, v0.b[5]
+; CHECK-GI-NEXT: umov w14, v0.b[6]
+; CHECK-GI-NEXT: umov w15, v0.b[7]
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: fmov w9, s4
-; CHECK-GI-NEXT: mul w10, w10, w11
-; CHECK-GI-NEXT: fmov w11, s6
-; CHECK-GI-NEXT: mul w9, w9, w12
-; CHECK-GI-NEXT: fmov w12, s7
-; CHECK-GI-NEXT: mul w8, w8, w10
-; CHECK-GI-NEXT: mul w11, w11, w12
-; CHECK-GI-NEXT: mul w9, w9, w11
+; CHECK-GI-NEXT: mul w9, w10, w11
+; CHECK-GI-NEXT: mul w10, w12, w13
+; CHECK-GI-NEXT: mul w11, w14, w15
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
; CHECK-GI-NEXT: ret
entry:
@@ -167,27 +157,20 @@ define i8 @mulv_v16i8(<16 x i8> %a) {
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: mov b4, v0.b[4]
-; CHECK-GI-NEXT: mov b5, v0.b[5]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov b6, v0.b[6]
-; CHECK-GI-NEXT: mov b7, v0.b[7]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
-; CHECK-GI-NEXT: fmov w12, s5
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: umov w10, v0.b[2]
+; CHECK-GI-NEXT: umov w11, v0.b[3]
+; CHECK-GI-NEXT: umov w12, v0.b[4]
+; CHECK-GI-NEXT: umov w13, v0.b[5]
+; CHECK-GI-NEXT: umov w14, v0.b[6]
+; CHECK-GI-NEXT: umov w15, v0.b[7]
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: mul w9, w10, w11
+; CHECK-GI-NEXT: mul w10, w12, w13
+; CHECK-GI-NEXT: mul w11, w14, w15
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: fmov w9, s4
-; CHECK-GI-NEXT: mul w10, w10, w11
-; CHECK-GI-NEXT: fmov w11, s6
-; CHECK-GI-NEXT: mul w9, w9, w12
-; CHECK-GI-NEXT: fmov w12, s7
-; CHECK-GI-NEXT: mul w8, w8, w10
-; CHECK-GI-NEXT: mul w11, w11, w12
-; CHECK-GI-NEXT: mul w9, w9, w11
+; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
; CHECK-GI-NEXT: ret
entry:
@@ -225,27 +208,20 @@ define i8 @mulv_v32i8(<32 x i8> %a) {
; CHECK-GI-NEXT: mul v0.8b, v0.8b, v2.8b
; CHECK-GI-NEXT: mul v1.8b, v1.8b, v3.8b
; CHECK-GI-NEXT: mul v0.8b, v0.8b, v1.8b
-; CHECK-GI-NEXT: mov b1, v0.b[1]
-; CHECK-GI-NEXT: mov b2, v0.b[2]
-; CHECK-GI-NEXT: mov b3, v0.b[3]
-; CHECK-GI-NEXT: mov b4, v0.b[4]
-; CHECK-GI-NEXT: mov b5, v0.b[5]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: mov b6, v0.b[6]
-; CHECK-GI-NEXT: mov b7, v0.b[7]
-; CHECK-GI-NEXT: fmov w9, s1
-; CHECK-GI-NEXT: fmov w10, s2
-; CHECK-GI-NEXT: fmov w11, s3
-; CHECK-GI-NEXT: fmov w12, s5
+; CHECK-GI-NEXT: umov w8, v0.b[0]
+; CHECK-GI-NEXT: umov w9, v0.b[1]
+; CHECK-GI-NEXT: umov w10, v0.b[2]
+; CHECK-GI-NEXT: umov w11, v0.b[3]
+; CHECK-GI-NEXT: umov w12, v0.b[4]
+; CHECK-GI-NEXT: umov w13, v0.b[5]
+; CHECK-GI-NEXT: umov w14, v0.b[6]
+; CHECK-GI-NEXT: umov w15, v0.b[7]
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: fmov w9, s4
-; CHECK-GI-NEXT: mul w10, w10, w11
-; CHECK-GI-NEXT: fmov w11, s6
-; CHECK-GI-NEXT: mul w9, w9, w12
-; CHECK-GI-NEXT: fmov w12, s7
-; CHECK-GI-NEXT: mul w8, w8, w10
-; CHECK-GI-NEXT: mul w11, w11, w12
-; CHECK-GI-NEXT: mul w9, w9, w11
+; CHECK-GI-NEXT: mul w9, w10, w11
+; CHECK-GI-NEXT: mul w10, w12, w13
+; CHECK-GI-NEXT: mul w11, w14, w15
+; CHECK-GI-NEXT: mul w8, w8, w9
+; CHECK-GI-NEXT: mul w9, w10, w11
; CHECK-GI-NEXT: mul w0, w8, w9
; CHECK-GI-NEXT: ret
entry:
@@ -289,13 +265,11 @@ define i16 @mulv_v3i16(<3 x i16> %a) {
; CHECK-GI-LABEL: mulv_v3i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-NEXT: mov h1, v0.h[1]
-; CHECK-GI-NEXT: mov h2, v0.h[2]
-; CHECK-GI-NEXT: fmov w8, s0
-; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: umov w8, v0.h[0]
+; CHECK-GI-NEXT: umov w9, v0.h[1]
+; CHECK-GI-NEXT: umov w10, v0.h[2]
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: fmov w9, s2
-; CHECK-GI-NEXT: mul w0, w8, w9
+; CHECK-GI-NEXT: mul w0, w8, ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/75662
More information about the llvm-commits
mailing list