[llvm] [AArch64][GloablISel] Refactor Combine G_CONCAT_VECTOR (PR #80866)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 6 08:22:51 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: None (chuongg3)
<details>
<summary>Changes</summary>
The combine now works using tablegen and checks if new instruction is legal before creating it.
---
Patch is 56.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/80866.diff
10 Files Affected:
- (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+12-12)
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+9-1)
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+20-20)
- (modified) llvm/lib/Target/AArch64/AArch64Combine.td (+2-2)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp (-2)
- (modified) llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp (-2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp (-2)
- (modified) llvm/test/CodeGen/AArch64/itofp.ll (+286-284)
- (modified) llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll (+4-10)
- (modified) llvm/test/CodeGen/AArch64/vecreduce-add.ll (+176-176)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 90428a622b417..372260e017c88 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -224,22 +224,22 @@ class CombinerHelper {
/// - concat_vector(undef, undef) => undef
/// - concat_vector(build_vector(A, B), build_vector(C, D)) =>
/// build_vector(A, B, C, D)
- ///
- /// \pre MI.getOpcode() == G_CONCAT_VECTORS.
- bool tryCombineConcatVectors(MachineInstr &MI);
+ /// ==========================================================
/// Check if the G_CONCAT_VECTORS \p MI is undef or if it
/// can be flattened into a build_vector.
- /// In the first case \p IsUndef will be true.
- /// In the second case \p Ops will contain the operands needed
- /// to produce the flattened build_vector.
+ /// In the first case \p bool will be true.
+ /// In the second case \p SmallVector<Register> will contain the operands
+ /// needed to produce the flattened build_vector.
///
/// \pre MI.getOpcode() == G_CONCAT_VECTORS.
- bool matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
- SmallVectorImpl<Register> &Ops);
- /// Replace \p MI with a flattened build_vector with \p Ops or an
- /// implicit_def if IsUndef is true.
- void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef,
- const ArrayRef<Register> Ops);
+ bool
+ matchCombineConcatVectors(MachineInstr &MI,
+ std::pair<bool, SmallVector<Register>> &matchinfo);
+ /// Replace \p MI with a flattened build_vector with \p SmallVector<Register>
+ /// or an implicit_def if \p bool is true.
+ void
+ applyCombineConcatVectors(MachineInstr &MI,
+ std::pair<bool, SmallVector<Register>> &matchinfo);
/// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
/// Returns true if MI changed.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 6bda80681432a..fa76164a322a7 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1241,6 +1241,14 @@ def match_selects : GICombineRule<
[{ return Helper.matchSelect(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+// Combines concat operations
+def concat_matchinfo : GIDefMatchData<"std::pair<bool, SmallVector<Register>>">;
+def combine_concat_vector : GICombineRule<
+ (defs root:$root, concat_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_CONCAT_VECTORS):$root,
+ [{ return Helper.matchCombineConcatVectors(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyCombineConcatVectors(*${root}, ${matchinfo}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -1314,7 +1322,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
- fsub_to_fneg, commute_constant_to_rhs]>;
+ fsub_to_fneg, commute_constant_to_rhs, combine_concat_vector]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 772229215e798..808b8939a38c7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -220,21 +220,11 @@ void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
replaceRegWith(MRI, DstReg, SrcReg);
}
-bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
- bool IsUndef = false;
- SmallVector<Register, 4> Ops;
- if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
- applyCombineConcatVectors(MI, IsUndef, Ops);
- return true;
- }
- return false;
-}
-
-bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
- SmallVectorImpl<Register> &Ops) {
+bool CombinerHelper::matchCombineConcatVectors(
+ MachineInstr &MI, std::pair<bool, SmallVector<Register>> &matchinfo) {
assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
"Invalid instruction");
- IsUndef = true;
+ matchinfo.first = true;
MachineInstr *Undef = nullptr;
// Walk over all the operands of concat vectors and check if they are
@@ -244,13 +234,15 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
Register Reg = MO.getReg();
MachineInstr *Def = MRI.getVRegDef(Reg);
assert(Def && "Operand not defined");
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return false;
switch (Def->getOpcode()) {
case TargetOpcode::G_BUILD_VECTOR:
- IsUndef = false;
+ matchinfo.first = false;
// Remember the operands of the build_vector to fold
// them into the yet-to-build flattened concat vectors.
for (const MachineOperand &BuildVecMO : Def->uses())
- Ops.push_back(BuildVecMO.getReg());
+ matchinfo.second.push_back(BuildVecMO.getReg());
break;
case TargetOpcode::G_IMPLICIT_DEF: {
LLT OpType = MRI.getType(Reg);
@@ -266,17 +258,25 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
// for the flattening.
for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
EltIdx != EltEnd; ++EltIdx)
- Ops.push_back(Undef->getOperand(0).getReg());
+ matchinfo.second.push_back(Undef->getOperand(0).getReg());
break;
}
default:
return false;
}
}
+
+ // Check if the combine is illegal
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_BUILD_VECTOR,
+ {DstTy, MRI.getType(matchinfo.second[0])}})) {
+ return false;
+ }
+
return true;
}
void CombinerHelper::applyCombineConcatVectors(
- MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
+ MachineInstr &MI, std::pair<bool, SmallVector<Register>> &matchinfo) {
// We determined that the concat_vectors can be flatten.
// Generate the flattened build_vector.
Register DstReg = MI.getOperand(0).getReg();
@@ -287,12 +287,12 @@ void CombinerHelper::applyCombineConcatVectors(
// checking that at all Ops are undef. Alternatively, we could have
// generate a build_vector of undefs and rely on another combine to
// clean that up. For now, given we already gather this information
- // in tryCombineConcatVectors, just save compile time and issue the
+ // in matchCombineConcatVectors, just save compile time and issue the
// right thing.
- if (IsUndef)
+ if (matchinfo.first)
Builder.buildUndef(NewDstReg);
else
- Builder.buildBuildVector(NewDstReg, Ops);
+ Builder.buildBuildVector(NewDstReg, matchinfo.second);
MI.eraseFromParent();
replaceRegWith(MRI, DstReg, NewDstReg);
}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1daa7d5fe6a7a..e4d8359c71e62 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -64,7 +64,7 @@ def AArch64PreLegalizerCombiner: GICombiner<
}
def AArch64O0PreLegalizerCombiner: GICombiner<
- "AArch64O0PreLegalizerCombinerImpl", [optnone_combines]> {
+ "AArch64O0PreLegalizerCombinerImpl", [optnone_combines, combine_concat_vector]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
@@ -288,5 +288,5 @@ def AArch64PostLegalizerCombiner
constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
- select_to_minmax, or_to_bsp]> {
+ select_to_minmax, or_to_bsp, combine_concat_vector]> {
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
index 0b82ed1280ddd..17dd8f2314a2b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -91,8 +91,6 @@ bool AArch64O0PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
switch (Opc) {
- case TargetOpcode::G_CONCAT_VECTORS:
- return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
case TargetOpcode::G_MEMCPY_INLINE:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 574d065ab01bb..a82d3cd095659 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -720,8 +720,6 @@ bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
switch (Opc) {
- case TargetOpcode::G_CONCAT_VECTORS:
- return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
case TargetOpcode::G_UADDO:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 0c7e198810da7..f14d970f1e5de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -106,8 +106,6 @@ bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
return true;
switch (MI.getOpcode()) {
- case TargetOpcode::G_CONCAT_VECTORS:
- return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
}
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index fa1ab61a6216f..0965d82f707e6 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -4043,28 +4043,28 @@ define <8 x half> @stofp_v8i64_v8f16(<8 x i64> %a) {
; CHECK-GI-FP16-LABEL: stofp_v8i64_v8f16:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: scvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: scvtf v2.2d, v2.2d
; CHECK-GI-FP16-NEXT: scvtf v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT: scvtf v2.2d, v2.2d
; CHECK-GI-FP16-NEXT: scvtf v3.2d, v3.2d
; CHECK-GI-FP16-NEXT: mov d4, v0.d[1]
-; CHECK-GI-FP16-NEXT: mov d5, v2.d[1]
; CHECK-GI-FP16-NEXT: fcvt h0, d0
-; CHECK-GI-FP16-NEXT: fcvt h2, d2
-; CHECK-GI-FP16-NEXT: fcvt h4, d4
-; CHECK-GI-FP16-NEXT: fcvt h5, d5
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v4.h[0]
-; CHECK-GI-FP16-NEXT: mov d4, v1.d[1]
+; CHECK-GI-FP16-NEXT: mov d5, v1.d[1]
; CHECK-GI-FP16-NEXT: fcvt h1, d1
-; CHECK-GI-FP16-NEXT: mov v2.h[1], v5.h[0]
-; CHECK-GI-FP16-NEXT: mov d5, v3.d[1]
-; CHECK-GI-FP16-NEXT: fcvt h3, d3
; CHECK-GI-FP16-NEXT: fcvt h4, d4
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v4.h[0]
+; CHECK-GI-FP16-NEXT: fcvt h4, d5
; CHECK-GI-FP16-NEXT: mov v0.h[2], v1.h[0]
-; CHECK-GI-FP16-NEXT: fcvt h1, d5
-; CHECK-GI-FP16-NEXT: mov v2.h[2], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov d1, v2.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h2, d2
; CHECK-GI-FP16-NEXT: mov v0.h[3], v4.h[0]
-; CHECK-GI-FP16-NEXT: mov v2.h[3], v1.h[0]
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-GI-FP16-NEXT: fcvt h1, d1
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov d2, v3.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h3, d3
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v1.h[0]
+; CHECK-GI-FP16-NEXT: fcvt h1, d2
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v1.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = sitofp <8 x i64> %a to <8 x half>
@@ -4103,28 +4103,28 @@ define <8 x half> @utofp_v8i64_v8f16(<8 x i64> %a) {
; CHECK-GI-FP16-LABEL: utofp_v8i64_v8f16:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v2.2d
; CHECK-GI-FP16-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v2.2d
; CHECK-GI-FP16-NEXT: ucvtf v3.2d, v3.2d
; CHECK-GI-FP16-NEXT: mov d4, v0.d[1]
-; CHECK-GI-FP16-NEXT: mov d5, v2.d[1]
; CHECK-GI-FP16-NEXT: fcvt h0, d0
-; CHECK-GI-FP16-NEXT: fcvt h2, d2
-; CHECK-GI-FP16-NEXT: fcvt h4, d4
-; CHECK-GI-FP16-NEXT: fcvt h5, d5
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v4.h[0]
-; CHECK-GI-FP16-NEXT: mov d4, v1.d[1]
+; CHECK-GI-FP16-NEXT: mov d5, v1.d[1]
; CHECK-GI-FP16-NEXT: fcvt h1, d1
-; CHECK-GI-FP16-NEXT: mov v2.h[1], v5.h[0]
-; CHECK-GI-FP16-NEXT: mov d5, v3.d[1]
-; CHECK-GI-FP16-NEXT: fcvt h3, d3
; CHECK-GI-FP16-NEXT: fcvt h4, d4
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v4.h[0]
+; CHECK-GI-FP16-NEXT: fcvt h4, d5
; CHECK-GI-FP16-NEXT: mov v0.h[2], v1.h[0]
-; CHECK-GI-FP16-NEXT: fcvt h1, d5
-; CHECK-GI-FP16-NEXT: mov v2.h[2], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov d1, v2.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h2, d2
; CHECK-GI-FP16-NEXT: mov v0.h[3], v4.h[0]
-; CHECK-GI-FP16-NEXT: mov v2.h[3], v1.h[0]
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-GI-FP16-NEXT: fcvt h1, d1
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov d2, v3.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h3, d3
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v1.h[0]
+; CHECK-GI-FP16-NEXT: fcvt h1, d2
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v1.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = uitofp <8 x i64> %a to <8 x half>
@@ -4183,51 +4183,51 @@ define <16 x half> @stofp_v16i64_v16f16(<16 x i64> %a) {
; CHECK-GI-FP16-LABEL: stofp_v16i64_v16f16:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: scvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: scvtf v16.2d, v2.2d
; CHECK-GI-FP16-NEXT: scvtf v4.2d, v4.2d
-; CHECK-GI-FP16-NEXT: scvtf v2.2d, v6.2d
-; CHECK-GI-FP16-NEXT: scvtf v20.2d, v1.2d
-; CHECK-GI-FP16-NEXT: scvtf v3.2d, v3.2d
+; CHECK-GI-FP16-NEXT: scvtf v18.2d, v1.2d
; CHECK-GI-FP16-NEXT: scvtf v5.2d, v5.2d
-; CHECK-GI-FP16-NEXT: scvtf v7.2d, v7.2d
-; CHECK-GI-FP16-NEXT: mov d6, v0.d[1]
-; CHECK-GI-FP16-NEXT: mov d17, v16.d[1]
-; CHECK-GI-FP16-NEXT: mov d18, v4.d[1]
-; CHECK-GI-FP16-NEXT: mov d19, v2.d[1]
+; CHECK-GI-FP16-NEXT: scvtf v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT: scvtf v3.2d, v3.2d
+; CHECK-GI-FP16-NEXT: mov d16, v0.d[1]
+; CHECK-GI-FP16-NEXT: mov d17, v4.d[1]
; CHECK-GI-FP16-NEXT: fcvt h0, d0
-; CHECK-GI-FP16-NEXT: fcvt h16, d16
; CHECK-GI-FP16-NEXT: fcvt h1, d4
-; CHECK-GI-FP16-NEXT: fcvt h2, d2
-; CHECK-GI-FP16-NEXT: fcvt h6, d6
-; CHECK-GI-FP16-NEXT: fcvt h17, d17
-; CHECK-GI-FP16-NEXT: fcvt h4, d18
-; CHECK-GI-FP16-NEXT: fcvt h18, d19
-; CHECK-GI-FP16-NEXT: fcvt h19, d20
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v6.h[0]
-; CHECK-GI-FP16-NEXT: mov d6, v20.d[1]
-; CHECK-GI-FP16-NEXT: mov v16.h[1], v17.h[0]
-; CHECK-GI-FP16-NEXT: mov d17, v3.d[1]
-; CHECK-GI-FP16-NEXT: fcvt h3, d3
-; CHECK-GI-FP16-NEXT: mov v1.h[1], v4.h[0]
-; CHECK-GI-FP16-NEXT: mov d4, v5.d[1]
+; CHECK-GI-FP16-NEXT: mov d19, v5.d[1]
; CHECK-GI-FP16-NEXT: fcvt h5, d5
-; CHECK-GI-FP16-NEXT: mov v2.h[1], v18.h[0]
-; CHECK-GI-FP16-NEXT: mov d18, v7.d[1]
-; CHECK-GI-FP16-NEXT: fcvt h7, d7
-; CHECK-GI-FP16-NEXT: mov v0.h[2], v19.h[0]
-; CHECK-GI-FP16-NEXT: mov v16.h[2], v3.h[0]
-; CHECK-GI-FP16-NEXT: fcvt h3, d6
+; CHECK-GI-FP16-NEXT: fcvt h16, d16
+; CHECK-GI-FP16-NEXT: fcvt h4, d17
+; CHECK-GI-FP16-NEXT: mov d17, v18.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h18, d18
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v16.h[0]
+; CHECK-GI-FP16-NEXT: mov v1.h[1], v4.h[0]
+; CHECK-GI-FP16-NEXT: scvtf v4.2d, v6.2d
; CHECK-GI-FP16-NEXT: fcvt h6, d17
-; CHECK-GI-FP16-NEXT: fcvt h4, d4
+; CHECK-GI-FP16-NEXT: fcvt h16, d19
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v18.h[0]
; CHECK-GI-FP16-NEXT: mov v1.h[2], v5.h[0]
-; CHECK-GI-FP16-NEXT: fcvt h5, d18
-; CHECK-GI-FP16-NEXT: mov v2.h[2], v7.h[0]
-; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
-; CHECK-GI-FP16-NEXT: mov v16.h[3], v6.h[0]
-; CHECK-GI-FP16-NEXT: mov v1.h[3], v4.h[0]
-; CHECK-GI-FP16-NEXT: mov v2.h[3], v5.h[0]
-; CHECK-GI-FP16-NEXT: mov v0.d[1], v16.d[0]
-; CHECK-GI-FP16-NEXT: mov v1.d[1], v2.d[0]
+; CHECK-GI-FP16-NEXT: mov d5, v2.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h2, d2
+; CHECK-GI-FP16-NEXT: mov d17, v4.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h4, d4
+; CHECK-GI-FP16-NEXT: mov v0.h[3], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v1.h[3], v16.h[0]
+; CHECK-GI-FP16-NEXT: scvtf v6.2d, v7.2d
+; CHECK-GI-FP16-NEXT: fcvt h5, d5
+; CHECK-GI-FP16-NEXT: fcvt h7, d17
+; CHECK-GI-FP16-NEXT: mov v0.h[4], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v1.h[4], v4.h[0]
+; CHECK-GI-FP16-NEXT: mov d2, v3.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h3, d3
+; CHECK-GI-FP16-NEXT: mov d4, v6.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h6, d6
+; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
+; CHECK-GI-FP16-NEXT: mov v1.h[5], v7.h[0]
+; CHECK-GI-FP16-NEXT: fcvt h2, d2
+; CHECK-GI-FP16-NEXT: fcvt h4, d4
+; CHECK-GI-FP16-NEXT: mov v0.h[6], v3.h[0]
+; CHECK-GI-FP16-NEXT: mov v1.h[6], v6.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.h[7], v2.h[0]
+; CHECK-GI-FP16-NEXT: mov v1.h[7], v4.h[0]
; CHECK-GI-FP16-NEXT: ret
entry:
%c = sitofp <16 x i64> %a to <16 x half>
@@ -4286,51 +4286,51 @@ define <16 x half> @utofp_v16i64_v16f16(<16 x i64> %a) {
; CHECK-GI-FP16-LABEL: utofp_v16i64_v16f16:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT: ucvtf v16.2d, v2.2d
; CHECK-GI-FP16-NEXT: ucvtf v4.2d, v4.2d
-; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v6.2d
-; CHECK-GI-FP16-NEXT: ucvtf v20.2d, v1.2d
-; CHECK-GI-FP16-NEXT: ucvtf v3.2d, v3.2d
+; CHECK-GI-FP16-NEXT: ucvtf v18.2d, v1.2d
; CHECK-GI-FP16-NEXT: ucvtf v5.2d, v5.2d
-; CHECK-GI-FP16-NEXT: ucvtf v7.2d, v7.2d
-; CHECK-GI-FP16-NEXT: mov d6, v0.d[1]
-; CHECK-GI-FP16-NEXT: mov d17, v16.d[1]
-; CHECK-GI-FP16-NEXT: mov d18, v4.d[1]
-; CHECK-GI-FP16-NEXT: mov d19, v2.d[1]
+; CHECK-GI-FP16-NEXT: ucvtf v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT: ucvtf v3.2d, v3.2d
+; CHECK-GI-FP16-NEXT: mov d16, v0.d[1]
+; CHECK-GI-FP16-NEXT: mov d17, v4.d[1]
; CHECK-GI-FP16-NEXT: fcvt h0, d0
-; CHECK-GI-FP16-NEXT: fcvt h16, d16
; CHECK-GI-FP16-NEXT: fcvt h1, d4
-; CHECK-GI-FP16-NEXT: fcvt h2, d2
-; CHECK-GI-FP16-NEXT: fcvt h6, d6
-; CHECK-GI-FP16-NEXT: fcvt h17, d17
-; CHECK-GI-FP16-NEXT: fcvt h4, d18
-; CHECK-GI-FP16-NEXT: fcvt h18, d19
-; CHECK-GI-FP16-NEXT: fcvt h19, d20
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v6.h[0]
-; CHECK-GI-FP16-NEXT: mov d6, v20.d[1]
-; CHECK-GI-FP16-NEXT: mov v16.h[1], v17.h[0]
-; CHECK-GI-FP16-NEXT: mov d17, v3.d[1]
-; CHECK-GI-FP16-NEXT: fcvt h3, d3
-; CHECK-GI-FP16-NEXT: mov v1.h[1], v4.h[0]
-; CHECK-GI-FP16-NEXT: mov d4, v5.d[1]
+; CHECK-GI-FP16-NEXT: mov d19, v5.d[1]
; CHECK-GI-FP16-NEXT: fcvt h5, d5
-; CHECK-GI-FP16-NEXT: mov v2.h[1], v18.h[0]
-; CHECK-GI-FP16-NEXT: mov d18, v7.d[1]
-; CHECK-GI-FP16-NEXT: fcvt h7, d7
-; CHECK-GI-FP16-NEXT: mov v0.h[2], v19.h[0]
-; CHECK-GI-FP16-NEXT: mov v16.h[2], v3.h[0]
-; CHECK-GI-FP16-NEXT: fcvt h3, d6
+; CHECK-GI-FP16-NEXT: fcvt h16, d16
+; CHECK-GI-FP16-NEXT: fcvt h4, d17
+; CHECK-GI-FP16-NEXT: mov d17, v18.d[1]
+; CHECK-GI-FP16-NEXT: fcvt h18, d18
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v16.h[0]
+; CHECK-GI-FP16-NEXT: mov v1.h[1], v4.h[0]
+; CHECK-GI-FP16-NEXT: ucvtf v4.2d, v6.2d
; CHECK-GI-FP16-NEXT: fcvt h6, d17
-; CHECK-GI-FP16-NEXT: fcvt h4, d4
+; CHECK-GI-FP16-NEXT: fcvt h16, d19
+; CHECK-GI-FP16-NEXT: mov v0.h[2], v18.h[0]
; ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/80866
More information about the llvm-commits
mailing list