[llvm] ee4e819 - [LLVM][AArch64][SVE] Mark DUP immediate instructions with isAsCheapAsAMove. (#133945)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 3 03:42:11 PDT 2025
Author: Paul Walker
Date: 2025-04-03T11:42:07+01:00
New Revision: ee4e8197fa67dd1ed6e9470e00708e7feeaacd97
URL: https://github.com/llvm/llvm-project/commit/ee4e8197fa67dd1ed6e9470e00708e7feeaacd97
DIFF: https://github.com/llvm/llvm-project/commit/ee4e8197fa67dd1ed6e9470e00708e7feeaacd97.diff
LOG: [LLVM][AArch64][SVE] Mark DUP immediate instructions with isAsCheapAsAMove. (#133945)
Doing this means we'll regenerate an immediate rather than copy the
result of an existing one, reducing instruction dependency chains.
Added:
Modified:
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll
llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
llvm/test/CodeGen/AArch64/sve-pr92779.ll
llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 772c440685072..c56713783289e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2113,6 +2113,7 @@ class sve_int_dup_mask_imm<string asm>
let DecoderMethod = "DecodeSVELogicalImmInstruction";
let hasSideEffects = 0;
+ let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
let Uses = [VG];
}
@@ -5118,6 +5119,7 @@ class sve_int_dup_imm<bits<2> sz8_64, string asm,
let Inst{4-0} = Zd;
let hasSideEffects = 0;
+ let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
let Uses = [VG];
}
@@ -5161,6 +5163,7 @@ class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype,
let Inst{4-0} = Zd;
let hasSideEffects = 0;
+ let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
let Uses = [VG];
}
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll
index 0485d530fd060..98f5b4c19a9b9 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll
@@ -51,20 +51,20 @@ define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4
; CHECK-LABEL: mul_add_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
+; CHECK-NEXT: mov z25.d, #0 // =0x0
+; CHECK-NEXT: mov z26.d, #0 // =0x0
+; CHECK-NEXT: mov z27.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
-; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
-; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
+; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
-; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
-; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
+; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
+; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
-; CHECK-NEXT: fadd z1.d, z26.d, z24.d
-; CHECK-NEXT: fadd z0.d, z25.d, z27.d
+; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #90
+; CHECK-NEXT: fadd z0.d, z24.d, z27.d
+; CHECK-NEXT: fadd z1.d, z25.d, z26.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -102,20 +102,20 @@ define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4
; CHECK-LABEL: mul_sub_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
+; CHECK-NEXT: mov z25.d, #0 // =0x0
+; CHECK-NEXT: mov z26.d, #0 // =0x0
+; CHECK-NEXT: mov z27.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
-; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
-; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
+; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
-; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
-; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
+; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
+; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
-; CHECK-NEXT: fsub z1.d, z26.d, z24.d
-; CHECK-NEXT: fsub z0.d, z25.d, z27.d
+; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #90
+; CHECK-NEXT: fsub z0.d, z24.d, z27.d
+; CHECK-NEXT: fsub z1.d, z25.d, z26.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -153,20 +153,20 @@ define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x
; CHECK-LABEL: mul_conj_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
+; CHECK-NEXT: mov z25.d, #0 // =0x0
+; CHECK-NEXT: mov z26.d, #0 // =0x0
+; CHECK-NEXT: mov z27.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
-; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
-; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
+; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
-; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
-; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
+; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z7.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
+; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #270
-; CHECK-NEXT: fadd z1.d, z26.d, z24.d
-; CHECK-NEXT: fadd z0.d, z25.d, z27.d
+; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z7.d, #270
+; CHECK-NEXT: fadd z0.d, z24.d, z27.d
+; CHECK-NEXT: fadd z1.d, z25.d, z26.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll
index c643ae9265c0b..2fc91125bc0ac 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll
@@ -42,18 +42,18 @@ define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4
; CHECK-LABEL: mul_add_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
+; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
+; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #90
-; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
+; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
-; CHECK-NEXT: mov z1.d, z24.d
+; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: mov z0.d, z25.d
+; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -91,18 +91,18 @@ define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4
; CHECK-LABEL: mul_sub_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
+; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #270
-; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180
+; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #180
-; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
+; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
-; CHECK-NEXT: mov z1.d, z24.d
+; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: mov z0.d, z25.d
+; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -140,18 +140,18 @@ define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x
; CHECK-LABEL: mul_conj_mull:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
+; CHECK-NEXT: mov z25.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
+; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
-; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
+; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #270
-; CHECK-NEXT: mov z1.d, z24.d
+; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
; CHECK-NEXT: mov z0.d, z25.d
+; CHECK-NEXT: mov z1.d, z24.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll
index b42d484ea74c9..80934d2cb98c2 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll
@@ -73,14 +73,14 @@ define <vscale x 16 x half> @complex_mul_v16f16(<vscale x 16 x half> %a, <vscale
; CHECK-LABEL: complex_mul_v16f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z4.h, #0 // =0x0
+; CHECK-NEXT: mov z5.h, #0 // =0x0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z5.d, z4.d
-; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #0
-; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90
+; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #90
-; CHECK-NEXT: mov z1.d, z4.d
+; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90
; CHECK-NEXT: mov z0.d, z5.d
+; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %a)
@@ -104,22 +104,22 @@ define <vscale x 32 x half> @complex_mul_v32f16(<vscale x 32 x half> %a, <vscale
; CHECK-LABEL: complex_mul_v32f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.h, #0 // =0x0
+; CHECK-NEXT: mov z25.h, #0 // =0x0
+; CHECK-NEXT: mov z26.h, #0 // =0x0
+; CHECK-NEXT: mov z27.h, #0 // =0x0
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #0
-; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #0
-; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #0
+; CHECK-NEXT: fcmla z24.h, p0/m, z4.h, z0.h, #0
+; CHECK-NEXT: fcmla z25.h, p0/m, z5.h, z1.h, #0
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #0
-; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #90
-; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #90
-; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #90
+; CHECK-NEXT: fcmla z26.h, p0/m, z7.h, z3.h, #0
+; CHECK-NEXT: fcmla z24.h, p0/m, z4.h, z0.h, #90
+; CHECK-NEXT: fcmla z25.h, p0/m, z5.h, z1.h, #90
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #90
-; CHECK-NEXT: mov z3.d, z24.d
-; CHECK-NEXT: mov z0.d, z25.d
-; CHECK-NEXT: mov z1.d, z26.d
+; CHECK-NEXT: fcmla z26.h, p0/m, z7.h, z3.h, #90
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
+; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.vector.deinterleave2.nxv32f16(<vscale x 32 x half> %a)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll
index bcd46aa182b55..874b5b538f1fd 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll
@@ -35,14 +35,14 @@ define <vscale x 8 x float> @complex_mul_v8f32(<vscale x 8 x float> %a, <vscale
; CHECK-LABEL: complex_mul_v8f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z4.s, #0 // =0x0
+; CHECK-NEXT: mov z5.s, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z5.d, z4.d
-; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #0
-; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90
+; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #90
-; CHECK-NEXT: mov z1.d, z4.d
+; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90
; CHECK-NEXT: mov z0.d, z5.d
+; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %a)
@@ -66,22 +66,22 @@ define <vscale x 16 x float> @complex_mul_v16f32(<vscale x 16 x float> %a, <vsca
; CHECK-LABEL: complex_mul_v16f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.s, #0 // =0x0
+; CHECK-NEXT: mov z25.s, #0 // =0x0
+; CHECK-NEXT: mov z26.s, #0 // =0x0
+; CHECK-NEXT: mov z27.s, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #0
-; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #0
-; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #0
+; CHECK-NEXT: fcmla z24.s, p0/m, z4.s, z0.s, #0
+; CHECK-NEXT: fcmla z25.s, p0/m, z5.s, z1.s, #0
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #0
-; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #90
-; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #90
-; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #90
+; CHECK-NEXT: fcmla z26.s, p0/m, z7.s, z3.s, #0
+; CHECK-NEXT: fcmla z24.s, p0/m, z4.s, z0.s, #90
+; CHECK-NEXT: fcmla z25.s, p0/m, z5.s, z1.s, #90
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #90
-; CHECK-NEXT: mov z3.d, z24.d
-; CHECK-NEXT: mov z0.d, z25.d
-; CHECK-NEXT: mov z1.d, z26.d
+; CHECK-NEXT: fcmla z26.s, p0/m, z7.s, z3.s, #90
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
+; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.vector.deinterleave2.nxv16f32(<vscale x 16 x float> %a)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll
index db28fa3997cb3..c9a092f52f159 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll
@@ -35,14 +35,14 @@ define <vscale x 4 x double> @complex_mul_v4f64(<vscale x 4 x double> %a, <vscal
; CHECK-LABEL: complex_mul_v4f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z4.d, #0 // =0x0
+; CHECK-NEXT: mov z5.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z5.d, z4.d
-; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #0
-; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90
+; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #90
-; CHECK-NEXT: mov z1.d, z4.d
+; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90
; CHECK-NEXT: mov z0.d, z5.d
+; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -66,22 +66,22 @@ define <vscale x 8 x double> @complex_mul_v8f64(<vscale x 8 x double> %a, <vscal
; CHECK-LABEL: complex_mul_v8f64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
+; CHECK-NEXT: mov z25.d, #0 // =0x0
+; CHECK-NEXT: mov z26.d, #0 // =0x0
+; CHECK-NEXT: mov z27.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #0
-; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #0
-; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z4.d, z0.d, #0
+; CHECK-NEXT: fcmla z25.d, p0/m, z5.d, z1.d, #0
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #90
-; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #90
-; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #90
+; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z3.d, #0
+; CHECK-NEXT: fcmla z24.d, p0/m, z4.d, z0.d, #90
+; CHECK-NEXT: fcmla z25.d, p0/m, z5.d, z1.d, #90
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #90
-; CHECK-NEXT: mov z3.d, z24.d
-; CHECK-NEXT: mov z0.d, z25.d
-; CHECK-NEXT: mov z1.d, z26.d
+; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z3.d, #90
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
+; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %a)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll
index b4cb548f63088..58a0809ee093f 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll
@@ -72,13 +72,13 @@ define <vscale x 16 x i16> @complex_mul_v16i16(<vscale x 16 x i16> %a, <vscale x
; CHECK-LABEL: complex_mul_v16i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z4.h, #0 // =0x0
-; CHECK-NEXT: mov z5.d, z4.d
-; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #0
+; CHECK-NEXT: mov z5.h, #0 // =0x0
; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #0
-; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #90
+; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #0
; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #90
-; CHECK-NEXT: mov z1.d, z4.d
+; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #90
; CHECK-NEXT: mov z0.d, z5.d
+; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %a)
@@ -102,21 +102,21 @@ define <vscale x 32 x i16> @complex_mul_v32i16(<vscale x 32 x i16> %a, <vscale x
; CHECK-LABEL: complex_mul_v32i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.h, #0 // =0x0
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #0
-; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #0
-; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #0
+; CHECK-NEXT: mov z25.h, #0 // =0x0
+; CHECK-NEXT: mov z26.h, #0 // =0x0
+; CHECK-NEXT: mov z27.h, #0 // =0x0
+; CHECK-NEXT: cmla z24.h, z4.h, z0.h, #0
+; CHECK-NEXT: cmla z25.h, z5.h, z1.h, #0
; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #0
-; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #90
-; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #90
-; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #90
+; CHECK-NEXT: cmla z26.h, z7.h, z3.h, #0
+; CHECK-NEXT: cmla z24.h, z4.h, z0.h, #90
+; CHECK-NEXT: cmla z25.h, z5.h, z1.h, #90
; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #90
-; CHECK-NEXT: mov z3.d, z24.d
-; CHECK-NEXT: mov z0.d, z25.d
-; CHECK-NEXT: mov z1.d, z26.d
+; CHECK-NEXT: cmla z26.h, z7.h, z3.h, #90
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
+; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.vector.deinterleave2.nxv32i16(<vscale x 32 x i16> %a)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll
index 4cfe4707b9a96..0958c60ed7cb0 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll
@@ -34,13 +34,13 @@ define <vscale x 8 x i32> @complex_mul_v8i32(<vscale x 8 x i32> %a, <vscale x 8
; CHECK-LABEL: complex_mul_v8i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z4.s, #0 // =0x0
-; CHECK-NEXT: mov z5.d, z4.d
-; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #0
+; CHECK-NEXT: mov z5.s, #0 // =0x0
; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #0
-; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #90
+; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #0
; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #90
-; CHECK-NEXT: mov z1.d, z4.d
+; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #90
; CHECK-NEXT: mov z0.d, z5.d
+; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %a)
@@ -64,21 +64,21 @@ define <vscale x 16 x i32> @complex_mul_v16i32(<vscale x 16 x i32> %a, <vscale x
; CHECK-LABEL: complex_mul_v16i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.s, #0 // =0x0
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #0
-; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #0
-; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #0
+; CHECK-NEXT: mov z25.s, #0 // =0x0
+; CHECK-NEXT: mov z26.s, #0 // =0x0
+; CHECK-NEXT: mov z27.s, #0 // =0x0
+; CHECK-NEXT: cmla z24.s, z4.s, z0.s, #0
+; CHECK-NEXT: cmla z25.s, z5.s, z1.s, #0
; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #0
-; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #90
-; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #90
-; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #90
+; CHECK-NEXT: cmla z26.s, z7.s, z3.s, #0
+; CHECK-NEXT: cmla z24.s, z4.s, z0.s, #90
+; CHECK-NEXT: cmla z25.s, z5.s, z1.s, #90
; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #90
-; CHECK-NEXT: mov z3.d, z24.d
-; CHECK-NEXT: mov z0.d, z25.d
-; CHECK-NEXT: mov z1.d, z26.d
+; CHECK-NEXT: cmla z26.s, z7.s, z3.s, #90
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
+; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %a)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll
index 5975f3b491d48..30c06838c81bc 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll
@@ -34,13 +34,13 @@ define <vscale x 4 x i64> @complex_mul_v4i64(<vscale x 4 x i64> %a, <vscale x 4
; CHECK-LABEL: complex_mul_v4i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z4.d, #0 // =0x0
-; CHECK-NEXT: mov z5.d, z4.d
-; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #0
+; CHECK-NEXT: mov z5.d, #0 // =0x0
; CHECK-NEXT: cmla z5.d, z2.d, z0.d, #0
-; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #90
+; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #0
; CHECK-NEXT: cmla z5.d, z2.d, z0.d, #90
-; CHECK-NEXT: mov z1.d, z4.d
+; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #90
; CHECK-NEXT: mov z0.d, z5.d
+; CHECK-NEXT: mov z1.d, z4.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %a)
@@ -64,21 +64,21 @@ define <vscale x 8 x i64> @complex_mul_v8i64(<vscale x 8 x i64> %a, <vscale x 8
; CHECK-LABEL: complex_mul_v8i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #0
-; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #0
-; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #0
+; CHECK-NEXT: mov z25.d, #0 // =0x0
+; CHECK-NEXT: mov z26.d, #0 // =0x0
+; CHECK-NEXT: mov z27.d, #0 // =0x0
+; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #0
+; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #0
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #0
-; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #90
-; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #90
-; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #90
+; CHECK-NEXT: cmla z26.d, z7.d, z3.d, #0
+; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #90
+; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #90
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #90
-; CHECK-NEXT: mov z3.d, z24.d
-; CHECK-NEXT: mov z0.d, z25.d
-; CHECK-NEXT: mov z1.d, z26.d
+; CHECK-NEXT: cmla z26.d, z7.d, z3.d, #90
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
+; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %a)
@@ -102,21 +102,21 @@ define <vscale x 8 x i64> @complex_minus_mul_v8i64(<vscale x 8 x i64> %a, <vscal
; CHECK-LABEL: complex_minus_mul_v8i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z24.d, #0 // =0x0
-; CHECK-NEXT: mov z25.d, z24.d
-; CHECK-NEXT: mov z26.d, z24.d
-; CHECK-NEXT: mov z27.d, z24.d
-; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #270
-; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #270
-; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #270
+; CHECK-NEXT: mov z25.d, #0 // =0x0
+; CHECK-NEXT: mov z26.d, #0 // =0x0
+; CHECK-NEXT: mov z27.d, #0 // =0x0
+; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #270
+; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #270
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #270
-; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #180
-; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #180
-; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #180
+; CHECK-NEXT: cmla z26.d, z7.d, z3.d, #270
+; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #180
+; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #180
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #180
-; CHECK-NEXT: mov z3.d, z24.d
-; CHECK-NEXT: mov z0.d, z25.d
-; CHECK-NEXT: mov z1.d, z26.d
+; CHECK-NEXT: cmla z26.d, z7.d, z3.d, #180
+; CHECK-NEXT: mov z0.d, z24.d
+; CHECK-NEXT: mov z1.d, z25.d
; CHECK-NEXT: mov z2.d, z27.d
+; CHECK-NEXT: mov z3.d, z26.d
; CHECK-NEXT: ret
entry:
%a.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %a)
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
index b4425c0c01e17..407da6cd6002b 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
@@ -8,24 +8,24 @@ target triple = "aarch64"
define <vscale x 4 x double> @complex_mul_const(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
; CHECK-LABEL: complex_mul_const:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z5.d, #0 // =0x0
; CHECK-NEXT: mov z4.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: fmov z7.d, #3.00000000
-; CHECK-NEXT: fmov z24.d, #11.00000000
-; CHECK-NEXT: mov z6.d, z4.d
-; CHECK-NEXT: mov z5.d, z4.d
-; CHECK-NEXT: fcmla z6.d, p0/m, z1.d, z3.d, #0
-; CHECK-NEXT: fcmla z5.d, p0/m, z0.d, z2.d, #0
-; CHECK-NEXT: fcmla z6.d, p0/m, z1.d, z3.d, #90
-; CHECK-NEXT: zip2 z1.d, z24.d, z7.d
-; CHECK-NEXT: fcmla z5.d, p0/m, z0.d, z2.d, #90
-; CHECK-NEXT: zip1 z2.d, z24.d, z7.d
-; CHECK-NEXT: mov z0.d, z4.d
-; CHECK-NEXT: fcmla z4.d, p0/m, z6.d, z1.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z2.d, #0
-; CHECK-NEXT: fcmla z4.d, p0/m, z6.d, z1.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z2.d, #90
-; CHECK-NEXT: mov z1.d, z4.d
+; CHECK-NEXT: fmov z6.d, #3.00000000
+; CHECK-NEXT: fmov z7.d, #11.00000000
+; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #0
+; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #0
+; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #90
+; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #90
+; CHECK-NEXT: mov z2.d, #0 // =0x0
+; CHECK-NEXT: zip2 z1.d, z7.d, z6.d
+; CHECK-NEXT: zip1 z3.d, z7.d, z6.d
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #0
+; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #90
+; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #90
+; CHECK-NEXT: mov z1.d, z2.d
; CHECK-NEXT: ret
entry:
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
@@ -56,25 +56,24 @@ define <vscale x 4 x double> @complex_mul_non_const(<vscale x 4 x double> %a, <v
; CHECK-LABEL: complex_mul_non_const:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z6.d, #0 // =0x0
-; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: mov z7.d, #0 // =0x0
; CHECK-NEXT: // kill: def $d5 killed $d5 def $z5
; CHECK-NEXT: // kill: def $d4 killed $d4 def $z4
+; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z5.d, d5
; CHECK-NEXT: mov z4.d, d4
-; CHECK-NEXT: mov z24.d, z6.d
-; CHECK-NEXT: mov z7.d, z6.d
-; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
-; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #0
-; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
-; CHECK-NEXT: zip2 z1.d, z4.d, z5.d
-; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #90
+; CHECK-NEXT: fcmla z6.d, p0/m, z0.d, z2.d, #0
+; CHECK-NEXT: fcmla z7.d, p0/m, z1.d, z3.d, #0
+; CHECK-NEXT: zip2 z24.d, z4.d, z5.d
+; CHECK-NEXT: fcmla z6.d, p0/m, z0.d, z2.d, #90
+; CHECK-NEXT: fcmla z7.d, p0/m, z1.d, z3.d, #90
; CHECK-NEXT: zip1 z2.d, z4.d, z5.d
-; CHECK-NEXT: mov z0.d, z6.d
-; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z1.d, #0
-; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z2.d, #0
-; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z1.d, #90
-; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z2.d, #90
-; CHECK-NEXT: mov z1.d, z6.d
+; CHECK-NEXT: mov z1.d, #0 // =0x0
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: fcmla z0.d, p0/m, z6.d, z2.d, #0
+; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z24.d, #0
+; CHECK-NEXT: fcmla z0.d, p0/m, z6.d, z2.d, #90
+; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z24.d, #90
; CHECK-NEXT: ret
entry:
%c.coerce.fca.0.extract = extractvalue [2 x double] %c, 0
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
index 48fbd14bd8540..1960987cce4ce 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll
@@ -438,21 +438,20 @@ define <vscale x 2 x double> @extract_col_q_v2f64(<vscale x 2 x double> %zd, <vs
define <vscale x 4 x i32> @test_sink_offset_operand(<vscale x 4 x i1> %pg, i32 %base, i32 %N) {
; CHECK-LABEL: test_sink_offset_operand:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: mov z0.s, #0 // =0x0
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: .LBB26_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: mov z1.s, #0 // =0x0
; CHECK-NEXT: subs w1, w1, #3
-; CHECK-NEXT: mov z3.d, z0.d
-; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 0]
-; CHECK-NEXT: mov z2.s, p0/m, za0h.s[w12, 1]
-; CHECK-NEXT: mov z3.s, p0/m, za0h.s[w12, 2]
+; CHECK-NEXT: mov z2.s, #0 // =0x0
+; CHECK-NEXT: mov z0.s, p0/m, za0h.s[w12, 0]
+; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 1]
+; CHECK-NEXT: mov z2.s, p0/m, za0h.s[w12, 2]
; CHECK-NEXT: b.ne .LBB26_1
; CHECK-NEXT: // %bb.2: // %exit
-; CHECK-NEXT: add z0.s, z1.s, z2.s
-; CHECK-NEXT: add z0.s, z0.s, z3.s
+; CHECK-NEXT: add z0.s, z0.s, z1.s
+; CHECK-NEXT: add z0.s, z0.s, z2.s
; CHECK-NEXT: ret
entry:
%add1 = add i32 %base, 1
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index 3fa7eca02c351..0c47e7e14183a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -29,58 +29,58 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: tbnz w1, #0, .LBB1_2
; CHECK-NEXT: // %bb.1: // %vector.body
; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: mov z1.b, #0 // =0x0
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: umov w8, v0.b[8]
-; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v1.b[1], v0.b[1]
; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: mov v2.b[1], v0.b[9]
; CHECK-NEXT: mov v1.b[2], v0.b[2]
-; CHECK-NEXT: mov v2.b[2], v0.b[10]
+; CHECK-NEXT: mov v2.b[1], v0.b[9]
; CHECK-NEXT: mov v1.b[3], v0.b[3]
-; CHECK-NEXT: mov v2.b[3], v0.b[11]
+; CHECK-NEXT: mov v2.b[2], v0.b[10]
; CHECK-NEXT: mov v1.b[4], v0.b[4]
-; CHECK-NEXT: mov v2.b[4], v0.b[12]
+; CHECK-NEXT: mov v2.b[3], v0.b[11]
; CHECK-NEXT: mov v1.b[5], v0.b[5]
-; CHECK-NEXT: mov v2.b[5], v0.b[13]
+; CHECK-NEXT: mov v2.b[4], v0.b[12]
; CHECK-NEXT: mov v1.b[6], v0.b[6]
-; CHECK-NEXT: mov v2.b[6], v0.b[14]
+; CHECK-NEXT: mov v2.b[5], v0.b[13]
; CHECK-NEXT: mov v1.b[7], v0.b[7]
+; CHECK-NEXT: mov v2.b[6], v0.b[14]
+; CHECK-NEXT: uunpklo z1.h, z1.b
; CHECK-NEXT: mov v2.b[7], v0.b[15]
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
-; CHECK-NEXT: uunpklo z1.h, z1.b
+; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z2.h, z2.b
-; CHECK-NEXT: uunpklo z1.s, z1.h
+; CHECK-NEXT: lsl z1.s, z1.s, #31
; CHECK-NEXT: uunpklo z3.h, z3.b
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uunpklo z2.s, z2.h
-; CHECK-NEXT: lsl z1.s, z1.s, #31
+; CHECK-NEXT: asr z1.s, z1.s, #31
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: lsl z0.s, z0.s, #31
-; CHECK-NEXT: asr z1.s, z1.s, #31
+; CHECK-NEXT: and z1.s, z1.s, #0x1
; CHECK-NEXT: lsl z2.s, z2.s, #31
; CHECK-NEXT: asr z0.s, z0.s, #31
-; CHECK-NEXT: and z1.s, z1.s, #0x1
+; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
; CHECK-NEXT: lsl z3.s, z3.s, #31
; CHECK-NEXT: asr z2.s, z2.s, #31
; CHECK-NEXT: and z0.s, z0.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
-; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
; CHECK-NEXT: asr z3.s, z3.s, #31
; CHECK-NEXT: and z2.s, z2.s, #0x1
+; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: and z3.s, z3.s, #0x1
; CHECK-NEXT: cmpne p4.s, p0/z, z2.s, #0
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
+; CHECK-NEXT: st1w { z1.s }, p0, [x0]
; CHECK-NEXT: cmpne p3.s, p0/z, z3.s, #0
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, #3, mul vl]
; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
; CHECK-NEXT: mov z2.s, p4/m, #0 // =0x0
-; CHECK-NEXT: st1w { z1.s }, p0, [x0]
; CHECK-NEXT: st1w { z0.s }, p0, [x0, #2, mul vl]
; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0
; CHECK-NEXT: st1w { z2.s }, p0, [x0, #1, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/sve-pr92779.ll b/llvm/test/CodeGen/AArch64/sve-pr92779.ll
index 480f41eb0f81b..1bb7801b6a047 100644
--- a/llvm/test/CodeGen/AArch64/sve-pr92779.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pr92779.ll
@@ -5,16 +5,16 @@ define void @main(ptr %0) {
; CHECK-LABEL: main:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: mov z1.d, z0.d
-; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT: uzp1 v1.2s, v0.2s, v1.2s
-; CHECK-NEXT: neg v1.2s, v1.2s
-; CHECK-NEXT: smov x8, v1.s[0]
-; CHECK-NEXT: smov x9, v1.s[1]
-; CHECK-NEXT: mov z0.d, p0/m, x8
-; CHECK-NEXT: mov z0.d, p0/m, x9
-; CHECK-NEXT: str z0, [x0]
+; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT: uzp1 v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: neg v0.2s, v0.2s
+; CHECK-NEXT: smov x8, v0.s[0]
+; CHECK-NEXT: smov x9, v0.s[1]
+; CHECK-NEXT: mov z1.d, p0/m, x8
+; CHECK-NEXT: mov z1.d, p0/m, x9
+; CHECK-NEXT: str z1, [x0]
; CHECK-NEXT: ret
"entry":
%1 = bitcast <vscale x 2 x i64> zeroinitializer to <vscale x 4 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
index 5c84551432909..2fe09f8ac7c5d 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
@@ -331,12 +331,12 @@ define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: scvtf_d_nxv4i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: fmov z1.d, p0/m, #-1.00000000
; CHECK-NEXT: fmov z0.d, p1/m, #-1.00000000
+; CHECK-NEXT: fmov z1.d, p0/m, #-1.00000000
; CHECK-NEXT: ret
%res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
ret <vscale x 4 x double> %res
@@ -392,12 +392,12 @@ define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: ucvtf_d_nxv4i1:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.d, #0 // =0x0
; CHECK-NEXT: mov z1.d, #0 // =0x0
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: punpkhi p0.h, p0.b
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: fmov z1.d, p0/m, #1.00000000
; CHECK-NEXT: fmov z0.d, p1/m, #1.00000000
+; CHECK-NEXT: fmov z1.d, p0/m, #1.00000000
; CHECK-NEXT: ret
%res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
ret <vscale x 4 x double> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index 275d13ebfd949..ad00e99b704dd 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -419,7 +419,6 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) {
; CHECK-LABEL: insertelement_v1i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, #5 // =0x5
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: insertelement_v1i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
index 9c7a3d5046d0e..37435e35ceabf 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
@@ -39,7 +39,6 @@ define <2 x i64> @fixed_vec_zero_constant() {
; CHECK-LABEL: fixed_vec_zero_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, #0 // =0x0
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fixed_vec_zero_constant:
@@ -54,7 +53,6 @@ define <2 x double> @fixed_vec_fp_zero_constant() {
; CHECK-LABEL: fixed_vec_fp_zero_constant:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z0.d, #0 // =0x0
-; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
;
; NONEON-NOSVE-LABEL: fixed_vec_fp_zero_constant:
More information about the llvm-commits
mailing list