[clang-tools-extra] [AArch64][CodeGen] Fix wrong operand order when creating vcmla intrinsic (PR #65278)

via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 8 06:18:23 PDT 2023


https://github.com/daisy202309 updated https://github.com/llvm/llvm-project/pull/65278:

>From 9b68bfc6078a400247a55d181a6bc8ff73819cd4 Mon Sep 17 00:00:00 2001
From: lizhijin <lizhijin3 at huawei.com>
Date: Tue, 5 Sep 2023 00:56:23 +0800
Subject: [PATCH] [AArch64][CodeGen] Fix wrong operand order when creating
 vcmla intrinsic

---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  2 +-
 ...-deinterleaving-add-mull-fixed-contract.ll | 48 ++++++++--------
 ...plex-deinterleaving-add-mull-fixed-fast.ll | 56 +++++++++----------
 .../AArch64/complex-deinterleaving-f16-mul.ll | 32 +++++------
 .../AArch64/complex-deinterleaving-f32-mul.ll | 32 +++++------
 .../AArch64/complex-deinterleaving-f64-mul.ll | 28 +++++-----
 .../complex-deinterleaving-mixed-cases.ll     | 56 +++++++++----------
 .../complex-deinterleaving-multiuses.ll       | 46 +++++++--------
 .../complex-deinterleaving-reductions.ll      | 32 +++++------
 .../AArch64/complex-deinterleaving-splat.ll   | 32 +++++------
 .../complex-deinterleaving-uniform-cases.ll   | 16 +++---
 11 files changed, 190 insertions(+), 190 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 16fe5bc2786318c..e37e64c4ca16925 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26123,7 +26123,7 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
 
 
     return B.CreateIntrinsic(IdMap[(int)Rotation], Ty,
-                             {Accumulator, InputB, InputA});
+                             {Accumulator, InputA, InputB});
   }
 
   if (OperationType == ComplexDeinterleavingOperation::CAdd) {
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll
index c684a18a7e0773c..09672d1be216136 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll
@@ -48,14 +48,14 @@ define <4 x double> @mul_add_mull(<4 x double> %a, <4 x double> %b, <4 x double>
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
 ; CHECK-NEXT:    movi v18.2d, #0000000000000000
 ; CHECK-NEXT:    movi v19.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v2.2d, #0
-; CHECK-NEXT:    fcmla v18.2d, v1.2d, v3.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v5.2d, v7.2d, #0
-; CHECK-NEXT:    fcmla v19.2d, v4.2d, v6.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v2.2d, #90
-; CHECK-NEXT:    fcmla v18.2d, v1.2d, v3.2d, #90
-; CHECK-NEXT:    fcmla v17.2d, v5.2d, v7.2d, #90
-; CHECK-NEXT:    fcmla v19.2d, v4.2d, v6.2d, #90
+; CHECK-NEXT:    fcmla v16.2d, v2.2d, v0.2d, #0
+; CHECK-NEXT:    fcmla v18.2d, v3.2d, v1.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v7.2d, v5.2d, #0
+; CHECK-NEXT:    fcmla v19.2d, v6.2d, v4.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v2.2d, v0.2d, #90
+; CHECK-NEXT:    fcmla v18.2d, v3.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v17.2d, v7.2d, v5.2d, #90
+; CHECK-NEXT:    fcmla v19.2d, v6.2d, v4.2d, #90
 ; CHECK-NEXT:    fadd v1.2d, v18.2d, v17.2d
 ; CHECK-NEXT:    fadd v0.2d, v16.2d, v19.2d
 ; CHECK-NEXT:    ret
@@ -94,14 +94,14 @@ define <4 x double> @mul_sub_mull(<4 x double> %a, <4 x double> %b, <4 x double>
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
 ; CHECK-NEXT:    movi v18.2d, #0000000000000000
 ; CHECK-NEXT:    movi v19.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v2.2d, #0
-; CHECK-NEXT:    fcmla v18.2d, v1.2d, v3.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v5.2d, v7.2d, #0
-; CHECK-NEXT:    fcmla v19.2d, v4.2d, v6.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v2.2d, #90
-; CHECK-NEXT:    fcmla v18.2d, v1.2d, v3.2d, #90
-; CHECK-NEXT:    fcmla v17.2d, v5.2d, v7.2d, #90
-; CHECK-NEXT:    fcmla v19.2d, v4.2d, v6.2d, #90
+; CHECK-NEXT:    fcmla v16.2d, v2.2d, v0.2d, #0
+; CHECK-NEXT:    fcmla v18.2d, v3.2d, v1.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v7.2d, v5.2d, #0
+; CHECK-NEXT:    fcmla v19.2d, v6.2d, v4.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v2.2d, v0.2d, #90
+; CHECK-NEXT:    fcmla v18.2d, v3.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v17.2d, v7.2d, v5.2d, #90
+; CHECK-NEXT:    fcmla v19.2d, v6.2d, v4.2d, #90
 ; CHECK-NEXT:    fsub v1.2d, v18.2d, v17.2d
 ; CHECK-NEXT:    fsub v0.2d, v16.2d, v19.2d
 ; CHECK-NEXT:    ret
@@ -140,14 +140,14 @@ define <4 x double> @mul_conj_mull(<4 x double> %a, <4 x double> %b, <4 x double
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
 ; CHECK-NEXT:    movi v18.2d, #0000000000000000
 ; CHECK-NEXT:    movi v19.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v2.2d, #0
-; CHECK-NEXT:    fcmla v18.2d, v1.2d, v3.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v7.2d, v5.2d, #0
-; CHECK-NEXT:    fcmla v19.2d, v6.2d, v4.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v2.2d, #90
-; CHECK-NEXT:    fcmla v18.2d, v1.2d, v3.2d, #90
-; CHECK-NEXT:    fcmla v17.2d, v7.2d, v5.2d, #270
-; CHECK-NEXT:    fcmla v19.2d, v6.2d, v4.2d, #270
+; CHECK-NEXT:    fcmla v16.2d, v2.2d, v0.2d, #0
+; CHECK-NEXT:    fcmla v18.2d, v3.2d, v1.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v5.2d, v7.2d, #0
+; CHECK-NEXT:    fcmla v19.2d, v4.2d, v6.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v2.2d, v0.2d, #90
+; CHECK-NEXT:    fcmla v18.2d, v3.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v17.2d, v5.2d, v7.2d, #270
+; CHECK-NEXT:    fcmla v19.2d, v4.2d, v6.2d, #270
 ; CHECK-NEXT:    fadd v1.2d, v18.2d, v17.2d
 ; CHECK-NEXT:    fadd v0.2d, v16.2d, v19.2d
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll
index 9b6a9e7adf796ff..7692b1cf0aaae1f 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll
@@ -7,10 +7,10 @@ target triple = "aarch64"
 define <4 x double> @mull_add(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
 ; CHECK-LABEL: mull_add:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcmla v4.2d, v2.2d, v0.2d, #0
-; CHECK-NEXT:    fcmla v5.2d, v3.2d, v1.2d, #0
-; CHECK-NEXT:    fcmla v4.2d, v2.2d, v0.2d, #90
-; CHECK-NEXT:    fcmla v5.2d, v3.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v4.2d, v0.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v5.2d, v1.2d, v3.2d, #0
+; CHECK-NEXT:    fcmla v4.2d, v0.2d, v2.2d, #90
+; CHECK-NEXT:    fcmla v5.2d, v1.2d, v3.2d, #90
 ; CHECK-NEXT:    mov v0.16b, v4.16b
 ; CHECK-NEXT:    mov v1.16b, v5.16b
 ; CHECK-NEXT:    ret
@@ -39,14 +39,14 @@ define <4 x double> @mul_add_mull(<4 x double> %a, <4 x double> %b, <4 x double>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v16.2d, #0000000000000000
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v2.2d, v0.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v3.2d, v1.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #90
-; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #90
-; CHECK-NEXT:    fcmla v17.2d, v2.2d, v0.2d, #90
-; CHECK-NEXT:    fcmla v16.2d, v3.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #90
+; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #90
+; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #90
+; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #90
 ; CHECK-NEXT:    mov v0.16b, v17.16b
 ; CHECK-NEXT:    mov v1.16b, v16.16b
 ; CHECK-NEXT:    ret
@@ -83,14 +83,14 @@ define <4 x double> @mul_sub_mull(<4 x double> %a, <4 x double> %b, <4 x double>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v16.2d, #0000000000000000
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #270
-; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #270
-; CHECK-NEXT:    fcmla v17.2d, v2.2d, v0.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v3.2d, v1.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #180
-; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #180
-; CHECK-NEXT:    fcmla v17.2d, v2.2d, v0.2d, #90
-; CHECK-NEXT:    fcmla v16.2d, v3.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #270
+; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #270
+; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #180
+; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #180
+; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #90
+; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #90
 ; CHECK-NEXT:    mov v0.16b, v17.16b
 ; CHECK-NEXT:    mov v1.16b, v16.16b
 ; CHECK-NEXT:    ret
@@ -127,14 +127,14 @@ define <4 x double> @mul_conj_mull(<4 x double> %a, <4 x double> %b, <4 x double
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v16.2d, #0000000000000000
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v17.2d, v2.2d, v0.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v3.2d, v1.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v2.2d, v0.2d, #90
-; CHECK-NEXT:    fcmla v16.2d, v3.2d, v1.2d, #90
-; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #270
-; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #270
+; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v0.2d, v2.2d, #90
+; CHECK-NEXT:    fcmla v16.2d, v1.2d, v3.2d, #90
+; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #270
+; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #270
 ; CHECK-NEXT:    mov v0.16b, v17.16b
 ; CHECK-NEXT:    mov v1.16b, v16.16b
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
index 40433e2e076aa74..fbe913e5472cc23 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
@@ -37,8 +37,8 @@ define <4 x half> @complex_mul_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-LABEL: complex_mul_v4f16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi d2, #0000000000000000
-; CHECK-NEXT:    fcmla v2.4h, v0.4h, v1.4h, #0
-; CHECK-NEXT:    fcmla v2.4h, v0.4h, v1.4h, #90
+; CHECK-NEXT:    fcmla v2.4h, v1.4h, v0.4h, #0
+; CHECK-NEXT:    fcmla v2.4h, v1.4h, v0.4h, #90
 ; CHECK-NEXT:    fmov d0, d2
 ; CHECK-NEXT:    ret
 entry:
@@ -61,8 +61,8 @@ define <8 x half> @complex_mul_v8f16(<8 x half> %a, <8 x half> %b) {
 ; CHECK-LABEL: complex_mul_v8f16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v2.8h, v0.8h, v1.8h, #0
-; CHECK-NEXT:    fcmla v2.8h, v0.8h, v1.8h, #90
+; CHECK-NEXT:    fcmla v2.8h, v1.8h, v0.8h, #0
+; CHECK-NEXT:    fcmla v2.8h, v1.8h, v0.8h, #90
 ; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -86,10 +86,10 @@ define <16 x half> @complex_mul_v16f16(<16 x half> %a, <16 x half> %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
 ; CHECK-NEXT:    movi v5.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v5.8h, v0.8h, v2.8h, #0
-; CHECK-NEXT:    fcmla v4.8h, v1.8h, v3.8h, #0
-; CHECK-NEXT:    fcmla v5.8h, v0.8h, v2.8h, #90
-; CHECK-NEXT:    fcmla v4.8h, v1.8h, v3.8h, #90
+; CHECK-NEXT:    fcmla v5.8h, v2.8h, v0.8h, #0
+; CHECK-NEXT:    fcmla v4.8h, v3.8h, v1.8h, #0
+; CHECK-NEXT:    fcmla v5.8h, v2.8h, v0.8h, #90
+; CHECK-NEXT:    fcmla v4.8h, v3.8h, v1.8h, #90
 ; CHECK-NEXT:    mov v0.16b, v5.16b
 ; CHECK-NEXT:    mov v1.16b, v4.16b
 ; CHECK-NEXT:    ret
@@ -116,14 +116,14 @@ define <32 x half> @complex_mul_v32f16(<32 x half> %a, <32 x half> %b) {
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
 ; CHECK-NEXT:    movi v18.2d, #0000000000000000
 ; CHECK-NEXT:    movi v19.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v16.8h, v0.8h, v4.8h, #0
-; CHECK-NEXT:    fcmla v18.8h, v1.8h, v5.8h, #0
-; CHECK-NEXT:    fcmla v17.8h, v3.8h, v7.8h, #0
-; CHECK-NEXT:    fcmla v19.8h, v2.8h, v6.8h, #0
-; CHECK-NEXT:    fcmla v16.8h, v0.8h, v4.8h, #90
-; CHECK-NEXT:    fcmla v18.8h, v1.8h, v5.8h, #90
-; CHECK-NEXT:    fcmla v17.8h, v3.8h, v7.8h, #90
-; CHECK-NEXT:    fcmla v19.8h, v2.8h, v6.8h, #90
+; CHECK-NEXT:    fcmla v16.8h, v4.8h, v0.8h, #0
+; CHECK-NEXT:    fcmla v18.8h, v5.8h, v1.8h, #0
+; CHECK-NEXT:    fcmla v17.8h, v7.8h, v3.8h, #0
+; CHECK-NEXT:    fcmla v19.8h, v6.8h, v2.8h, #0
+; CHECK-NEXT:    fcmla v16.8h, v4.8h, v0.8h, #90
+; CHECK-NEXT:    fcmla v18.8h, v5.8h, v1.8h, #90
+; CHECK-NEXT:    fcmla v17.8h, v7.8h, v3.8h, #90
+; CHECK-NEXT:    fcmla v19.8h, v6.8h, v2.8h, #90
 ; CHECK-NEXT:    mov v0.16b, v16.16b
 ; CHECK-NEXT:    mov v1.16b, v18.16b
 ; CHECK-NEXT:    mov v3.16b, v17.16b
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll
index 05f07f6fd1c2c89..5f30d9642ce8b00 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll
@@ -8,8 +8,8 @@ define <2 x float> @complex_mul_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: complex_mul_v2f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi d2, #0000000000000000
-; CHECK-NEXT:    fcmla v2.2s, v0.2s, v1.2s, #0
-; CHECK-NEXT:    fcmla v2.2s, v0.2s, v1.2s, #90
+; CHECK-NEXT:    fcmla v2.2s, v1.2s, v0.2s, #0
+; CHECK-NEXT:    fcmla v2.2s, v1.2s, v0.2s, #90
 ; CHECK-NEXT:    fmov d0, d2
 ; CHECK-NEXT:    ret
 entry:
@@ -32,8 +32,8 @@ define <4 x float> @complex_mul_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: complex_mul_v4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #0
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #0
+; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -57,10 +57,10 @@ define <8 x float> @complex_mul_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
 ; CHECK-NEXT:    movi v5.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v5.4s, v0.4s, v2.4s, #0
-; CHECK-NEXT:    fcmla v4.4s, v1.4s, v3.4s, #0
-; CHECK-NEXT:    fcmla v5.4s, v0.4s, v2.4s, #90
-; CHECK-NEXT:    fcmla v4.4s, v1.4s, v3.4s, #90
+; CHECK-NEXT:    fcmla v5.4s, v2.4s, v0.4s, #0
+; CHECK-NEXT:    fcmla v4.4s, v3.4s, v1.4s, #0
+; CHECK-NEXT:    fcmla v5.4s, v2.4s, v0.4s, #90
+; CHECK-NEXT:    fcmla v4.4s, v3.4s, v1.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v5.16b
 ; CHECK-NEXT:    mov v1.16b, v4.16b
 ; CHECK-NEXT:    ret
@@ -87,14 +87,14 @@ define <16 x float> @complex_mul_v16f32(<16 x float> %a, <16 x float> %b) {
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
 ; CHECK-NEXT:    movi v18.2d, #0000000000000000
 ; CHECK-NEXT:    movi v19.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v16.4s, v0.4s, v4.4s, #0
-; CHECK-NEXT:    fcmla v18.4s, v1.4s, v5.4s, #0
-; CHECK-NEXT:    fcmla v17.4s, v3.4s, v7.4s, #0
-; CHECK-NEXT:    fcmla v19.4s, v2.4s, v6.4s, #0
-; CHECK-NEXT:    fcmla v16.4s, v0.4s, v4.4s, #90
-; CHECK-NEXT:    fcmla v18.4s, v1.4s, v5.4s, #90
-; CHECK-NEXT:    fcmla v17.4s, v3.4s, v7.4s, #90
-; CHECK-NEXT:    fcmla v19.4s, v2.4s, v6.4s, #90
+; CHECK-NEXT:    fcmla v16.4s, v4.4s, v0.4s, #0
+; CHECK-NEXT:    fcmla v18.4s, v5.4s, v1.4s, #0
+; CHECK-NEXT:    fcmla v17.4s, v7.4s, v3.4s, #0
+; CHECK-NEXT:    fcmla v19.4s, v6.4s, v2.4s, #0
+; CHECK-NEXT:    fcmla v16.4s, v4.4s, v0.4s, #90
+; CHECK-NEXT:    fcmla v18.4s, v5.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v17.4s, v7.4s, v3.4s, #90
+; CHECK-NEXT:    fcmla v19.4s, v6.4s, v2.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v16.16b
 ; CHECK-NEXT:    mov v1.16b, v18.16b
 ; CHECK-NEXT:    mov v3.16b, v17.16b
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll
index 6df59951e2143c3..6d7b156c3b64c94 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll
@@ -8,8 +8,8 @@ define <2 x double> @complex_mul_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: complex_mul_v2f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v2.2d, v0.2d, v1.2d, #0
-; CHECK-NEXT:    fcmla v2.2d, v0.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v2.2d, v1.2d, v0.2d, #0
+; CHECK-NEXT:    fcmla v2.2d, v1.2d, v0.2d, #90
 ; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -33,10 +33,10 @@ define <4 x double> @complex_mul_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
 ; CHECK-NEXT:    movi v5.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v5.2d, v0.2d, v2.2d, #0
-; CHECK-NEXT:    fcmla v4.2d, v1.2d, v3.2d, #0
-; CHECK-NEXT:    fcmla v5.2d, v0.2d, v2.2d, #90
-; CHECK-NEXT:    fcmla v4.2d, v1.2d, v3.2d, #90
+; CHECK-NEXT:    fcmla v5.2d, v2.2d, v0.2d, #0
+; CHECK-NEXT:    fcmla v4.2d, v3.2d, v1.2d, #0
+; CHECK-NEXT:    fcmla v5.2d, v2.2d, v0.2d, #90
+; CHECK-NEXT:    fcmla v4.2d, v3.2d, v1.2d, #90
 ; CHECK-NEXT:    mov v0.16b, v5.16b
 ; CHECK-NEXT:    mov v1.16b, v4.16b
 ; CHECK-NEXT:    ret
@@ -63,14 +63,14 @@ define <8 x double> @complex_mul_v8f64(<8 x double> %a, <8 x double> %b) {
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
 ; CHECK-NEXT:    movi v18.2d, #0000000000000000
 ; CHECK-NEXT:    movi v19.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v4.2d, #0
-; CHECK-NEXT:    fcmla v18.2d, v1.2d, v5.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v3.2d, v7.2d, #0
-; CHECK-NEXT:    fcmla v19.2d, v2.2d, v6.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v4.2d, #90
-; CHECK-NEXT:    fcmla v18.2d, v1.2d, v5.2d, #90
-; CHECK-NEXT:    fcmla v17.2d, v3.2d, v7.2d, #90
-; CHECK-NEXT:    fcmla v19.2d, v2.2d, v6.2d, #90
+; CHECK-NEXT:    fcmla v16.2d, v4.2d, v0.2d, #0
+; CHECK-NEXT:    fcmla v18.2d, v5.2d, v1.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v7.2d, v3.2d, #0
+; CHECK-NEXT:    fcmla v19.2d, v6.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v4.2d, v0.2d, #90
+; CHECK-NEXT:    fcmla v18.2d, v5.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v17.2d, v7.2d, v3.2d, #90
+; CHECK-NEXT:    fcmla v19.2d, v6.2d, v2.2d, #90
 ; CHECK-NEXT:    mov v0.16b, v16.16b
 ; CHECK-NEXT:    mov v1.16b, v18.16b
 ; CHECK-NEXT:    mov v3.16b, v17.16b
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll
index f7837b2367671d8..1ed9cf2db24f729 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll
@@ -9,10 +9,10 @@ define <4 x float> @mul_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v4.4s, v0.4s, v1.4s, #0
-; CHECK-NEXT:    fcmla v4.4s, v0.4s, v1.4s, #90
-; CHECK-NEXT:    fcmla v3.4s, v4.4s, v2.4s, #0
-; CHECK-NEXT:    fcmla v3.4s, v4.4s, v2.4s, #90
+; CHECK-NEXT:    fcmla v4.4s, v1.4s, v0.4s, #0
+; CHECK-NEXT:    fcmla v4.4s, v1.4s, v0.4s, #90
+; CHECK-NEXT:    fcmla v3.4s, v2.4s, v4.4s, #0
+; CHECK-NEXT:    fcmla v3.4s, v2.4s, v4.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v3.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -130,10 +130,10 @@ define <4 x float> @mul_triangle(<4 x float> %a, <4 x float> %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #0
-; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #90
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v3.4s, #0
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v3.4s, #90
+; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #0
+; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v2.4s, v3.4s, v0.4s, #0
+; CHECK-NEXT:    fcmla v2.4s, v3.4s, v0.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -166,15 +166,15 @@ define <4 x float> @mul_diamond(<4 x float> %a, <4 x float> %b, <4 x float> %c,
 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
 ; CHECK-NEXT:    movi v5.2d, #0000000000000000
 ; CHECK-NEXT:    movi v6.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v4.4s, v1.4s, v0.4s, #0
-; CHECK-NEXT:    fcmla v6.4s, v2.4s, v0.4s, #0
-; CHECK-NEXT:    fcmla v4.4s, v1.4s, v0.4s, #90
+; CHECK-NEXT:    fcmla v4.4s, v0.4s, v1.4s, #0
+; CHECK-NEXT:    fcmla v6.4s, v0.4s, v2.4s, #0
+; CHECK-NEXT:    fcmla v4.4s, v0.4s, v1.4s, #90
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v6.4s, v2.4s, v0.4s, #90
-; CHECK-NEXT:    fcmla v5.4s, v4.4s, v3.4s, #0
-; CHECK-NEXT:    fcmla v5.4s, v4.4s, v3.4s, #90
-; CHECK-NEXT:    fcmla v1.4s, v6.4s, v5.4s, #0
-; CHECK-NEXT:    fcmla v1.4s, v6.4s, v5.4s, #90
+; CHECK-NEXT:    fcmla v6.4s, v0.4s, v2.4s, #90
+; CHECK-NEXT:    fcmla v5.4s, v3.4s, v4.4s, #0
+; CHECK-NEXT:    fcmla v5.4s, v3.4s, v4.4s, #90
+; CHECK-NEXT:    fcmla v1.4s, v5.4s, v6.4s, #0
+; CHECK-NEXT:    fcmla v1.4s, v5.4s, v6.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -220,10 +220,10 @@ define <4 x float> @mul_add90_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v4.4s, v2.4s, v0.4s, #0
-; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #0
-; CHECK-NEXT:    fcmla v4.4s, v2.4s, v0.4s, #90
-; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #90
+; CHECK-NEXT:    fcmla v4.4s, v0.4s, v2.4s, #0
+; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #0
+; CHECK-NEXT:    fcmla v4.4s, v0.4s, v2.4s, #90
+; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #90
 ; CHECK-NEXT:    fcadd v0.4s, v4.4s, v3.4s, #90
 ; CHECK-NEXT:    ret
 entry:
@@ -358,8 +358,8 @@ entry:
 define <4 x float> @mul_addequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; CHECK-LABEL: mul_addequal:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #0
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #0
+; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -386,8 +386,8 @@ define <4 x float> @mul_subequal(<4 x float> %a, <4 x float> %b, <4 x float> %c)
 ; CHECK-LABEL: mul_subequal:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #0
-; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #0
+; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #90
 ; CHECK-NEXT:    fsub v0.4s, v3.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -415,8 +415,8 @@ define <4 x float> @mul_mulequal(<4 x float> %a, <4 x float> %b, <4 x float> %c)
 ; CHECK-LABEL: mul_mulequal:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #0
-; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #0
+; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #90
 ; CHECK-NEXT:    fmul v0.4s, v3.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -484,8 +484,8 @@ define <4 x float> @mul_negequal(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: mul_negequal:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #180
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #270
+; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #180
+; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #270
 ; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
index 16dec1af60c1ca0..039025dafa0d6e8 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
@@ -10,11 +10,11 @@ define <4 x float> @mul_triangle(<4 x float> %a, <4 x float> %b, ptr %p) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #0
-; CHECK-NEXT:    fcmla v3.4s, v1.4s, v0.4s, #90
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v3.4s, #0
+; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #0
+; CHECK-NEXT:    fcmla v3.4s, v0.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v2.4s, v3.4s, v0.4s, #0
 ; CHECK-NEXT:    str q3, [x0]
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v3.4s, #90
+; CHECK-NEXT:    fcmla v2.4s, v3.4s, v0.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -112,10 +112,10 @@ define <4 x float> @multiple_muls_shuffle_external(<4 x float> %a, <4 x float> %
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    fmul v5.2s, v4.2s, v5.2s
 ; CHECK-NEXT:    fmla v17.2s, v1.2s, v4.2s
-; CHECK-NEXT:    fcmla v0.4s, v3.4s, v2.4s, #0
+; CHECK-NEXT:    fcmla v0.4s, v2.4s, v3.4s, #0
 ; CHECK-NEXT:    str d1, [x0]
 ; CHECK-NEXT:    fneg v16.2s, v5.2s
-; CHECK-NEXT:    fcmla v0.4s, v3.4s, v2.4s, #90
+; CHECK-NEXT:    fcmla v0.4s, v2.4s, v3.4s, #90
 ; CHECK-NEXT:    fmla v16.2s, v1.2s, v6.2s
 ; CHECK-NEXT:    st2 { v16.2s, v17.2s }, [x1]
 ; CHECK-NEXT:    ret
@@ -179,8 +179,8 @@ define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %p
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    st2 { v2.2s, v3.2s }, [x5]
 ; CHECK-NEXT:    ldr q1, [x3]
-; CHECK-NEXT:    fcmla v0.4s, v1.4s, v5.4s, #0
-; CHECK-NEXT:    fcmla v0.4s, v1.4s, v5.4s, #90
+; CHECK-NEXT:    fcmla v0.4s, v5.4s, v1.4s, #0
+; CHECK-NEXT:    fcmla v0.4s, v5.4s, v1.4s, #90
 ; CHECK-NEXT:    ret
 entry:
   %a = load <4 x float>, ptr %ptr_a
@@ -300,31 +300,31 @@ define void @mul_add_common_mul_add_mul(<4 x double> %a, <4 x double> %b, <4 x d
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v16.2d, #0000000000000000
 ; CHECK-NEXT:    movi v17.2d, #0000000000000000
+; CHECK-NEXT:    ldr q19, [sp, #112]
+; CHECK-NEXT:    ldp q18, q20, [sp, #80]
+; CHECK-NEXT:    ldr q21, [sp, #64]
 ; CHECK-NEXT:    movi v22.2d, #0000000000000000
-; CHECK-NEXT:    ldp q21, q18, [sp, #96]
-; CHECK-NEXT:    ldp q20, q19, [sp, #64]
-; CHECK-NEXT:    fcmla v22.2d, v3.2d, v1.2d, #0
 ; CHECK-NEXT:    fcmla v16.2d, v18.2d, v19.2d, #0
 ; CHECK-NEXT:    fcmla v17.2d, v21.2d, v20.2d, #0
-; CHECK-NEXT:    fcmla v22.2d, v3.2d, v1.2d, #90
-; CHECK-NEXT:    ldr q1, [sp, #48]
-; CHECK-NEXT:    ldr q3, [sp]
+; CHECK-NEXT:    fcmla v22.2d, v1.2d, v3.2d, #0
 ; CHECK-NEXT:    fcmla v16.2d, v18.2d, v19.2d, #90
 ; CHECK-NEXT:    movi v18.2d, #0000000000000000
 ; CHECK-NEXT:    fcmla v17.2d, v21.2d, v20.2d, #90
-; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #0
-; CHECK-NEXT:    fcmla v18.2d, v2.2d, v0.2d, #0
-; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v7.2d, v5.2d, #90
-; CHECK-NEXT:    fcmla v18.2d, v2.2d, v0.2d, #90
-; CHECK-NEXT:    fcmla v17.2d, v6.2d, v4.2d, #90
-; CHECK-NEXT:    ldp q0, q2, [sp, #16]
+; CHECK-NEXT:    fcmla v22.2d, v1.2d, v3.2d, #90
+; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #0
+; CHECK-NEXT:    fcmla v18.2d, v0.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #0
+; CHECK-NEXT:    fcmla v16.2d, v5.2d, v7.2d, #90
+; CHECK-NEXT:    fcmla v18.2d, v0.2d, v2.2d, #90
+; CHECK-NEXT:    fcmla v17.2d, v4.2d, v6.2d, #90
+; CHECK-NEXT:    ldp q3, q0, [sp, #32]
+; CHECK-NEXT:    ldp q2, q1, [sp]
 ; CHECK-NEXT:    fsub v4.2d, v22.2d, v16.2d
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v1.2d, #0
 ; CHECK-NEXT:    fsub v5.2d, v18.2d, v17.2d
+; CHECK-NEXT:    fcmla v16.2d, v0.2d, v1.2d, #0
 ; CHECK-NEXT:    fcmla v17.2d, v3.2d, v2.2d, #0
-; CHECK-NEXT:    fcmla v16.2d, v0.2d, v1.2d, #90
 ; CHECK-NEXT:    stp q5, q4, [x0]
+; CHECK-NEXT:    fcmla v16.2d, v0.2d, v1.2d, #90
 ; CHECK-NEXT:    fcmla v17.2d, v3.2d, v2.2d, #90
 ; CHECK-NEXT:    stp q17, q16, [x1]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
index d245c0a0e4823d0..40fd7a392c83b96 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll
@@ -25,10 +25,10 @@ define dso_local %"struct.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldp q3, q2, [x9]
 ; CHECK-NEXT:    cmp x8, #1600
 ; CHECK-NEXT:    ldp q5, q4, [x10]
-; CHECK-NEXT:    fcmla v0.2d, v3.2d, v5.2d, #0
-; CHECK-NEXT:    fcmla v1.2d, v2.2d, v4.2d, #0
-; CHECK-NEXT:    fcmla v0.2d, v3.2d, v5.2d, #90
-; CHECK-NEXT:    fcmla v1.2d, v2.2d, v4.2d, #90
+; CHECK-NEXT:    fcmla v0.2d, v5.2d, v3.2d, #0
+; CHECK-NEXT:    fcmla v1.2d, v4.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v0.2d, v5.2d, v3.2d, #90
+; CHECK-NEXT:    fcmla v1.2d, v4.2d, v2.2d, #90
 ; CHECK-NEXT:    b.ne .LBB0_1
 ; CHECK-NEXT:  // %bb.2: // %middle.block
 ; CHECK-NEXT:    zip2 v2.2d, v0.2d, v1.2d
@@ -92,10 +92,10 @@ define %"struct.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldp q3, q2, [x9]
 ; CHECK-NEXT:    cmp x8, #1600
 ; CHECK-NEXT:    ldp q5, q4, [x10]
-; CHECK-NEXT:    fcmla v1.2d, v3.2d, v5.2d, #0
-; CHECK-NEXT:    fcmla v0.2d, v2.2d, v4.2d, #0
-; CHECK-NEXT:    fcmla v1.2d, v3.2d, v5.2d, #90
-; CHECK-NEXT:    fcmla v0.2d, v2.2d, v4.2d, #90
+; CHECK-NEXT:    fcmla v1.2d, v5.2d, v3.2d, #0
+; CHECK-NEXT:    fcmla v0.2d, v4.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v1.2d, v5.2d, v3.2d, #90
+; CHECK-NEXT:    fcmla v0.2d, v4.2d, v2.2d, #90
 ; CHECK-NEXT:    b.ne .LBB1_1
 ; CHECK-NEXT:  // %bb.2: // %middle.block
 ; CHECK-NEXT:    zip2 v2.2d, v1.2d, v0.2d
@@ -159,14 +159,14 @@ define %"struct.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldp q7, q6, [x10]
 ; CHECK-NEXT:    ldp q17, q16, [x9, #32]
 ; CHECK-NEXT:    ldp q19, q18, [x10, #32]
-; CHECK-NEXT:    fcmla v1.2d, v5.2d, v7.2d, #0
-; CHECK-NEXT:    fcmla v0.2d, v4.2d, v6.2d, #0
-; CHECK-NEXT:    fcmla v2.2d, v17.2d, v19.2d, #0
-; CHECK-NEXT:    fcmla v3.2d, v16.2d, v18.2d, #0
-; CHECK-NEXT:    fcmla v1.2d, v5.2d, v7.2d, #90
-; CHECK-NEXT:    fcmla v0.2d, v4.2d, v6.2d, #90
-; CHECK-NEXT:    fcmla v2.2d, v17.2d, v19.2d, #90
-; CHECK-NEXT:    fcmla v3.2d, v16.2d, v18.2d, #90
+; CHECK-NEXT:    fcmla v1.2d, v7.2d, v5.2d, #0
+; CHECK-NEXT:    fcmla v0.2d, v6.2d, v4.2d, #0
+; CHECK-NEXT:    fcmla v2.2d, v19.2d, v17.2d, #0
+; CHECK-NEXT:    fcmla v3.2d, v18.2d, v16.2d, #0
+; CHECK-NEXT:    fcmla v1.2d, v7.2d, v5.2d, #90
+; CHECK-NEXT:    fcmla v0.2d, v6.2d, v4.2d, #90
+; CHECK-NEXT:    fcmla v2.2d, v19.2d, v17.2d, #90
+; CHECK-NEXT:    fcmla v3.2d, v18.2d, v16.2d, #90
 ; CHECK-NEXT:    b.ne .LBB2_1
 ; CHECK-NEXT:  // %bb.2: // %middle.block
 ; CHECK-NEXT:    zip2 v4.2d, v2.2d, v3.2d
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
index 0dbc2ecc8b008d7..8de2ac5a140c681 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
@@ -13,16 +13,16 @@ define <4 x double> @complex_mul_const(<4 x double> %a, <4 x double> %b) {
 ; CHECK-NEXT:    movi v5.2d, #0000000000000000
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v6.2d, v3.2d, v1.2d, #0
-; CHECK-NEXT:    fcmla v5.2d, v2.2d, v0.2d, #0
-; CHECK-NEXT:    fcmla v6.2d, v3.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v6.2d, v1.2d, v3.2d, #0
+; CHECK-NEXT:    fcmla v5.2d, v0.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v6.2d, v1.2d, v3.2d, #90
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT:    fcmla v5.2d, v2.2d, v0.2d, #90
+; CHECK-NEXT:    fcmla v5.2d, v0.2d, v2.2d, #90
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v4.2d, v1.2d, v6.2d, #0
-; CHECK-NEXT:    fcmla v0.2d, v1.2d, v5.2d, #0
-; CHECK-NEXT:    fcmla v4.2d, v1.2d, v6.2d, #90
-; CHECK-NEXT:    fcmla v0.2d, v1.2d, v5.2d, #90
+; CHECK-NEXT:    fcmla v4.2d, v6.2d, v1.2d, #0
+; CHECK-NEXT:    fcmla v0.2d, v5.2d, v1.2d, #0
+; CHECK-NEXT:    fcmla v4.2d, v6.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v0.2d, v5.2d, v1.2d, #90
 ; CHECK-NEXT:    mov v1.16b, v4.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -58,15 +58,15 @@ define <4 x double> @complex_mul_non_const(<4 x double> %a, <4 x double> %b, [2
 ; CHECK-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-NEXT:    mov v4.d[1], v5.d[0]
 ; CHECK-NEXT:    movi v5.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v7.2d, v3.2d, v1.2d, #0
-; CHECK-NEXT:    fcmla v6.2d, v2.2d, v0.2d, #0
-; CHECK-NEXT:    fcmla v7.2d, v3.2d, v1.2d, #90
-; CHECK-NEXT:    fcmla v6.2d, v2.2d, v0.2d, #90
+; CHECK-NEXT:    fcmla v7.2d, v1.2d, v3.2d, #0
+; CHECK-NEXT:    fcmla v6.2d, v0.2d, v2.2d, #0
+; CHECK-NEXT:    fcmla v7.2d, v1.2d, v3.2d, #90
+; CHECK-NEXT:    fcmla v6.2d, v0.2d, v2.2d, #90
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v5.2d, v4.2d, v7.2d, #0
-; CHECK-NEXT:    fcmla v0.2d, v4.2d, v6.2d, #0
-; CHECK-NEXT:    fcmla v5.2d, v4.2d, v7.2d, #90
-; CHECK-NEXT:    fcmla v0.2d, v4.2d, v6.2d, #90
+; CHECK-NEXT:    fcmla v5.2d, v7.2d, v4.2d, #0
+; CHECK-NEXT:    fcmla v0.2d, v6.2d, v4.2d, #0
+; CHECK-NEXT:    fcmla v5.2d, v7.2d, v4.2d, #90
+; CHECK-NEXT:    fcmla v0.2d, v6.2d, v4.2d, #90
 ; CHECK-NEXT:    mov v1.16b, v5.16b
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
index 81a8631a1691b56..2cbc8ed3192de48 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
@@ -8,8 +8,8 @@ define <4 x float> @simple_mul(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: simple_mul:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #0
-; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #0
+; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -65,10 +65,10 @@ define <4 x float> @three_way_mul(<4 x float> %a, <4 x float> %b, <4 x float> %c
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v4.2d, #0000000000000000
 ; CHECK-NEXT:    movi v3.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v4.4s, v1.4s, v0.4s, #0
-; CHECK-NEXT:    fcmla v4.4s, v1.4s, v0.4s, #90
-; CHECK-NEXT:    fcmla v3.4s, v2.4s, v4.4s, #0
-; CHECK-NEXT:    fcmla v3.4s, v2.4s, v4.4s, #90
+; CHECK-NEXT:    fcmla v4.4s, v0.4s, v1.4s, #0
+; CHECK-NEXT:    fcmla v4.4s, v0.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v3.4s, v4.4s, v2.4s, #0
+; CHECK-NEXT:    fcmla v3.4s, v4.4s, v2.4s, #90
 ; CHECK-NEXT:    mov v0.16b, v3.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -177,8 +177,8 @@ define <4 x float> @mul_mul_with_fneg(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: mul_mul_with_fneg:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0000000000000000
-; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #270
-; CHECK-NEXT:    fcmla v2.4s, v1.4s, v0.4s, #180
+; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #270
+; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #180
 ; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:



More information about the cfe-commits mailing list