[llvm] bc104fd - [PowerPC] Relax register superclasses for paired memops

Qiu Chaofan via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 11 00:06:07 PDT 2021


Author: Qiu Chaofan
Date: 2021-06-11T14:54:03+08:00
New Revision: bc104fdcecc0da1650177f3587ffe233b37f071b

URL: https://github.com/llvm/llvm-project/commit/bc104fdcecc0da1650177f3587ffe233b37f071b
DIFF: https://github.com/llvm/llvm-project/commit/bc104fdcecc0da1650177f3587ffe233b37f071b.diff

LOG: [PowerPC] Relax register superclasses for paired memops

Relaxing superclass constraint for VSX register classes helps reducing
32-byte spills and copies when register pressure is high.

In test case affected, some of them introduces more copies due to new
allocation order. However, this patch should not be the root cause, and
we may be able to fix it in other places of register allocation.

Reviewed By: nemanjai

Differential Revision: https://reviews.llvm.org/D104006

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
    llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
    llvm/test/CodeGen/PowerPC/constant-pool.ll
    llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
    llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
    llvm/test/CodeGen/PowerPC/mma-outer-product.ll
    llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
    llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
    llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll
    llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
    llvm/test/CodeGen/PowerPC/vsx.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index b27728ae34258..260c5f3897c01 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -514,6 +514,8 @@ const TargetRegisterClass *
 PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
                                            const MachineFunction &MF) const {
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const auto *DefaultSuperclass =
+      TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
   if (Subtarget.hasVSX()) {
     // With VSX, we can inflate various sub-register classes to the full VSX
     // register set.
@@ -530,15 +532,27 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
       if (RC == &PPC::GPRCRegClass && EnableGPRToVecSpills)
         InflateGPRC++;
     }
-    if (RC == &PPC::F8RCRegClass)
-      return &PPC::VSFRCRegClass;
-    else if (RC == &PPC::VRRCRegClass)
-      return &PPC::VSRCRegClass;
-    else if (RC == &PPC::F4RCRegClass && Subtarget.hasP8Vector())
-      return &PPC::VSSRCRegClass;
+
+    for (const auto *I = RC->getSuperClasses(); *I; ++I) {
+      if (getRegSizeInBits(**I) != getRegSizeInBits(*RC))
+        continue;
+
+      switch ((*I)->getID()) {
+      case PPC::VSSRCRegClassID:
+        return Subtarget.hasP8Vector() ? *I : DefaultSuperclass;
+      case PPC::VSFRCRegClassID:
+      case PPC::VSRCRegClassID:
+        return *I;
+      case PPC::VSRpRCRegClassID:
+        return Subtarget.pairedVectorMemops() ? *I : DefaultSuperclass;
+      case PPC::ACCRCRegClassID:
+      case PPC::UACCRCRegClassID:
+        return Subtarget.hasMMA() ? *I : DefaultSuperclass;
+      }
+    }
   }
 
-  return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
+  return DefaultSuperclass;
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
index 76d76ecae7312..14837771762d8 100644
--- a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll
@@ -1560,14 +1560,12 @@ declare <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32>, <2 x i64>, i32)
 define <2 x i64> @intrinsicExtractTest(<2 x i64> %a) {
 ; CHECK-64-LABEL: intrinsicExtractTest:
 ; CHECK-64:       # %bb.0: # %entry
-; CHECK-64-NEXT:    xxextractuw 0, 34, 5
-; CHECK-64-NEXT:    xxlor 34, 0, 0
+; CHECK-64-NEXT:    xxextractuw 34, 34, 5
 ; CHECK-64-NEXT:    blr
 ;
 ; CHECK-32-LABEL: intrinsicExtractTest:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    xxextractuw 0, 34, 5
-; CHECK-32-NEXT:    xxlor 34, 0, 0
+; CHECK-32-NEXT:    xxextractuw 34, 34, 5
 ; CHECK-32-NEXT:    blr
 entry:
   %ans = tail call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> %a, i32 5)

diff  --git a/llvm/test/CodeGen/PowerPC/constant-pool.ll b/llvm/test/CodeGen/PowerPC/constant-pool.ll
index 69c2582f40053..d8a833c4be1ee 100644
--- a/llvm/test/CodeGen/PowerPC/constant-pool.ll
+++ b/llvm/test/CodeGen/PowerPC/constant-pool.ll
@@ -360,25 +360,28 @@ define ppc_fp128 @three_constants_ppcf128(ppc_fp128 %a, ppc_fp128 %c) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
-; CHECK-NEXT:    stdu r1, -32(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-DAG:     xxlxor f4, f4, f4
-; CHECK-DAG:     xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT:    .cfi_offset v31, -16
+; CHECK-NEXT:    stxv vs63, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    xxsplti32dx vs63, 0, 1074935889
+; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-NEXT:    xxlor vs3, vs63, vs63
 ; CHECK-NEXT:    xxsplti32dx vs3, 1, -343597384
 ; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd at notoc
-; CHECK-DAG:     xxlxor f4, f4, f4
-; CHECK-DAG:     xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT:    xxlor vs3, vs63, vs63
+; CHECK-NEXT:    xxlxor f4, f4, f4
 ; CHECK-NEXT:    xxsplti32dx vs3, 1, -1719329096
 ; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd at notoc
-; CHECK-DAG:     xxlxor f4, f4, f4
-; CHECK-DAG:     xxsplti32dx vs3, 0, 1074935889
-; CHECK-NEXT:    xxsplti32dx vs3, 1, 8724152
-; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
+; CHECK-NEXT:    xxsplti32dx vs63, 1, 8724152
+; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-NEXT:    xscpsgndp f3, vs63, vs63
 ; CHECK-NEXT:    bl __gcc_qadd at notoc
-; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    lxv vs63, 32(r1) # 16-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 48
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr

diff  --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
index a03e9f96c9f3d..1285b6b61a3d5 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
@@ -11,8 +11,7 @@ declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <
 declare void @foo()
 define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, i8* %ptr) {
 ; CHECK-LABEL: intrinsics1:
-; CHECK:         .localentry intrinsics1, 1
-; CHECK-NEXT:  # %bb.0:
+; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    .cfi_def_cfa_offset 176
 ; CHECK-NEXT:    .cfi_offset lr, 16
@@ -46,11 +45,11 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK-NEXT:    li r3, 32
 ; CHECK-NEXT:    lxvp vsp2, r1(r3)
 ; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    lxvp vsp34, r1(r3) # 32-byte Folded Reload
+; CHECK-NEXT:    lxvp vsp4, r1(r3) # 32-byte Folded Reload
 ; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    lxvp vsp36, r1(r3) # 32-byte Folded Reload
+; CHECK-NEXT:    lxvp vsp6, r1(r3) # 32-byte Folded Reload
 ; CHECK-NEXT:    xxmtacc acc0
-; CHECK-NEXT:    xvf16ger2pp acc0, v2, v4
+; CHECK-NEXT:    xvf16ger2pp acc0, vs4, vs6
 ; CHECK-NEXT:    xxmfacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r30)
 ; CHECK-NEXT:    stxv vs1, 32(r30)
@@ -98,11 +97,11 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK-BE-NEXT:    li r3, 144
 ; CHECK-BE-NEXT:    lxvp vsp2, r1(r3)
 ; CHECK-BE-NEXT:    li r3, 208
-; CHECK-BE-NEXT:    lxvp vsp34, r1(r3) # 32-byte Folded Reload
+; CHECK-BE-NEXT:    lxvp vsp4, r1(r3) # 32-byte Folded Reload
 ; CHECK-BE-NEXT:    li r3, 176
-; CHECK-BE-NEXT:    lxvp vsp36, r1(r3) # 32-byte Folded Reload
+; CHECK-BE-NEXT:    lxvp vsp6, r1(r3) # 32-byte Folded Reload
 ; CHECK-BE-NEXT:    xxmtacc acc0
-; CHECK-BE-NEXT:    xvf16ger2pp acc0, v2, v4
+; CHECK-BE-NEXT:    xvf16ger2pp acc0, vs4, vs6
 ; CHECK-BE-NEXT:    xxmfacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r30)
 ; CHECK-BE-NEXT:    stxvx vs0, 0, r30

diff  --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
index 82e86c5761ef0..0207d121369df 100644
--- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
@@ -11,11 +11,16 @@ declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <
 define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-LABEL: ass_acc:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmr v3, v2
-; CHECK-NEXT:    xxlor vs0, v2, v2
-; CHECK-NEXT:    xxlor vs1, v3, v3
-; CHECK-NEXT:    xxlor vs2, v2, v2
-; CHECK-NEXT:    xxlor vs3, v3, v3
+; CHECK-NEXT:    xxlor vs1, v2, v2
+; CHECK-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-NEXT:    xxlor vs4, vs0, vs0
+; CHECK-NEXT:    xxlor vs5, vs1, vs1
+; CHECK-NEXT:    xxlor vs6, vs0, vs0
+; CHECK-NEXT:    xxlor vs7, vs1, vs1
+; CHECK-NEXT:    xxlor vs0, vs4, vs4
+; CHECK-NEXT:    xxlor vs1, vs5, vs5
+; CHECK-NEXT:    xxlor vs2, vs6, vs6
+; CHECK-NEXT:    xxlor vs3, vs7, vs7
 ; CHECK-NEXT:    stxv vs0, 48(r3)
 ; CHECK-NEXT:    stxv vs1, 32(r3)
 ; CHECK-NEXT:    stxv vs2, 16(r3)
@@ -24,11 +29,16 @@ define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ;
 ; CHECK-BE-LABEL: ass_acc:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vmr v3, v2
-; CHECK-BE-NEXT:    xxlor vs0, v2, v2
-; CHECK-BE-NEXT:    xxlor vs1, v3, v3
-; CHECK-BE-NEXT:    xxlor vs2, v2, v2
-; CHECK-BE-NEXT:    xxlor vs3, v3, v3
+; CHECK-BE-NEXT:    xxlor vs1, v2, v2
+; CHECK-BE-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs4, vs0, vs0
+; CHECK-BE-NEXT:    xxlor vs5, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs6, vs0, vs0
+; CHECK-BE-NEXT:    xxlor vs7, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs0, vs4, vs4
+; CHECK-BE-NEXT:    xxlor vs1, vs5, vs5
+; CHECK-BE-NEXT:    xxlor vs2, vs6, vs6
+; CHECK-BE-NEXT:    xxlor vs3, vs7, vs7
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)
@@ -45,11 +55,16 @@ declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>)
 define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-LABEL: int_xxmtacc:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmr v3, v2
-; CHECK-NEXT:    xxlor vs0, v2, v2
-; CHECK-NEXT:    xxlor vs1, v3, v3
-; CHECK-NEXT:    xxlor vs2, v2, v2
-; CHECK-NEXT:    xxlor vs3, v3, v3
+; CHECK-NEXT:    xxlor vs1, v2, v2
+; CHECK-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-NEXT:    xxlor vs4, vs0, vs0
+; CHECK-NEXT:    xxlor vs5, vs1, vs1
+; CHECK-NEXT:    xxlor vs6, vs0, vs0
+; CHECK-NEXT:    xxlor vs7, vs1, vs1
+; CHECK-NEXT:    xxlor vs0, vs4, vs4
+; CHECK-NEXT:    xxlor vs1, vs5, vs5
+; CHECK-NEXT:    xxlor vs2, vs6, vs6
+; CHECK-NEXT:    xxlor vs3, vs7, vs7
 ; CHECK-NEXT:    xxmtacc acc0
 ; CHECK-NEXT:    stxv vs0, 48(r3)
 ; CHECK-NEXT:    stxv vs1, 32(r3)
@@ -59,11 +74,16 @@ define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ;
 ; CHECK-BE-LABEL: int_xxmtacc:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vmr v3, v2
-; CHECK-BE-NEXT:    xxlor vs0, v2, v2
-; CHECK-BE-NEXT:    xxlor vs1, v3, v3
-; CHECK-BE-NEXT:    xxlor vs2, v2, v2
-; CHECK-BE-NEXT:    xxlor vs3, v3, v3
+; CHECK-BE-NEXT:    xxlor vs1, v2, v2
+; CHECK-BE-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs4, vs0, vs0
+; CHECK-BE-NEXT:    xxlor vs5, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs6, vs0, vs0
+; CHECK-BE-NEXT:    xxlor vs7, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs0, vs4, vs4
+; CHECK-BE-NEXT:    xxlor vs1, vs5, vs5
+; CHECK-BE-NEXT:    xxlor vs2, vs6, vs6
+; CHECK-BE-NEXT:    xxlor vs3, vs7, vs7
 ; CHECK-BE-NEXT:    xxmtacc acc0
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
@@ -84,11 +104,16 @@ declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>)
 define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-LABEL: int_xxmfacc:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmr v3, v2
-; CHECK-NEXT:    xxlor vs0, v2, v2
-; CHECK-NEXT:    xxlor vs1, v3, v3
-; CHECK-NEXT:    xxlor vs2, v2, v2
-; CHECK-NEXT:    xxlor vs3, v3, v3
+; CHECK-NEXT:    xxlor vs1, v2, v2
+; CHECK-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-NEXT:    xxlor vs4, vs0, vs0
+; CHECK-NEXT:    xxlor vs5, vs1, vs1
+; CHECK-NEXT:    xxlor vs6, vs0, vs0
+; CHECK-NEXT:    xxlor vs7, vs1, vs1
+; CHECK-NEXT:    xxlor vs0, vs4, vs4
+; CHECK-NEXT:    xxlor vs1, vs5, vs5
+; CHECK-NEXT:    xxlor vs2, vs6, vs6
+; CHECK-NEXT:    xxlor vs3, vs7, vs7
 ; CHECK-NEXT:    stxv vs0, 48(r3)
 ; CHECK-NEXT:    stxv vs1, 32(r3)
 ; CHECK-NEXT:    stxv vs2, 16(r3)
@@ -97,11 +122,16 @@ define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
 ;
 ; CHECK-BE-LABEL: int_xxmfacc:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vmr v3, v2
-; CHECK-BE-NEXT:    xxlor vs0, v2, v2
-; CHECK-BE-NEXT:    xxlor vs1, v3, v3
-; CHECK-BE-NEXT:    xxlor vs2, v2, v2
-; CHECK-BE-NEXT:    xxlor vs3, v3, v3
+; CHECK-BE-NEXT:    xxlor vs1, v2, v2
+; CHECK-BE-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs4, vs0, vs0
+; CHECK-BE-NEXT:    xxlor vs5, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs6, vs0, vs0
+; CHECK-BE-NEXT:    xxlor vs7, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs0, vs4, vs4
+; CHECK-BE-NEXT:    xxlor vs1, vs5, vs5
+; CHECK-BE-NEXT:    xxlor vs2, vs6, vs6
+; CHECK-BE-NEXT:    xxlor vs3, vs7, vs7
 ; CHECK-BE-NEXT:    stxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    stxv vs3, 48(r3)

diff  --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
index 9d245c768839b..856cb9d384c1a 100644
--- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll
@@ -12,51 +12,51 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
 ; CHECK-LABEL: intrinsics1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
-; CHECK-NEXT:    vmr v1, v4
+; CHECK-NEXT:    xxlor vs1, v4, v4
 ; CHECK-NEXT:    vmr v4, v3
 ; CHECK-NEXT:    ld r3, 96(r1)
-; CHECK-NEXT:    vmr v0, v2
-; CHECK-NEXT:    xxlor vs5, v2, v2
-; CHECK-NEXT:    xxlor vs4, v5, v5
-; CHECK-NEXT:    xxlor vs0, v0, v0
-; CHECK-NEXT:    xxlor vs1, v1, v1
-; CHECK-NEXT:    xxlor vs2, v4, v4
-; CHECK-NEXT:    xxlor vs3, v5, v5
-; CHECK-NEXT:    xxmtacc acc0
-; CHECK-NEXT:    xvi4ger8pp acc0, v2, v3
-; CHECK-NEXT:    xvf16ger2pp acc0, v2, v1
-; CHECK-NEXT:    pmxvf32gerpn acc0, v3, v5, 0, 0
-; CHECK-NEXT:    pmxvf64gernp acc0, vsp4, v0, 0, 0
-; CHECK-NEXT:    xxmfacc acc0
-; CHECK-NEXT:    stxv vs0, 48(r3)
-; CHECK-NEXT:    stxv vs1, 32(r3)
-; CHECK-NEXT:    stxv vs2, 16(r3)
-; CHECK-NEXT:    stxvx vs3, 0, r3
+; CHECK-NEXT:    xxlor vs0, v2, v2
+; CHECK-NEXT:    xxlor vs3, v2, v2
+; CHECK-NEXT:    xxlor vs2, v5, v5
+; CHECK-NEXT:    xxlor vs4, vs0, vs0
+; CHECK-NEXT:    xxlor vs5, vs1, vs1
+; CHECK-NEXT:    xxlor vs6, v4, v4
+; CHECK-NEXT:    xxlor vs7, v5, v5
+; CHECK-NEXT:    xxmtacc acc1
+; CHECK-NEXT:    xvi4ger8pp acc1, v2, v3
+; CHECK-NEXT:    xvf16ger2pp acc1, v2, vs1
+; CHECK-NEXT:    pmxvf32gerpn acc1, v3, v5, 0, 0
+; CHECK-NEXT:    pmxvf64gernp acc1, vsp2, vs0, 0, 0
+; CHECK-NEXT:    xxmfacc acc1
+; CHECK-NEXT:    stxv vs4, 48(r3)
+; CHECK-NEXT:    stxv vs5, 32(r3)
+; CHECK-NEXT:    stxv vs6, 16(r3)
+; CHECK-NEXT:    stxvx vs7, 0, r3
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: intrinsics1:
 ; CHECK-BE:       # %bb.0:
 ; CHECK-BE-NEXT:    # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
-; CHECK-BE-NEXT:    vmr v1, v4
+; CHECK-BE-NEXT:    xxlor vs1, v4, v4
 ; CHECK-BE-NEXT:    vmr v4, v3
 ; CHECK-BE-NEXT:    ld r3, 112(r1)
-; CHECK-BE-NEXT:    vmr v0, v2
-; CHECK-BE-NEXT:    xxlor vs5, v2, v2
-; CHECK-BE-NEXT:    xxlor vs4, v5, v5
-; CHECK-BE-NEXT:    xxlor vs0, v0, v0
-; CHECK-BE-NEXT:    xxlor vs1, v1, v1
-; CHECK-BE-NEXT:    xxlor vs2, v4, v4
-; CHECK-BE-NEXT:    xxlor vs3, v5, v5
-; CHECK-BE-NEXT:    xxmtacc acc0
-; CHECK-BE-NEXT:    xvi4ger8pp acc0, v2, v3
-; CHECK-BE-NEXT:    xvf16ger2pp acc0, v2, v1
-; CHECK-BE-NEXT:    pmxvf32gerpn acc0, v3, v5, 0, 0
-; CHECK-BE-NEXT:    pmxvf64gernp acc0, vsp4, v0, 0, 0
-; CHECK-BE-NEXT:    xxmfacc acc0
-; CHECK-BE-NEXT:    stxv vs1, 16(r3)
-; CHECK-BE-NEXT:    stxvx vs0, 0, r3
-; CHECK-BE-NEXT:    stxv vs3, 48(r3)
-; CHECK-BE-NEXT:    stxv vs2, 32(r3)
+; CHECK-BE-NEXT:    xxlor vs0, v2, v2
+; CHECK-BE-NEXT:    xxlor vs3, v2, v2
+; CHECK-BE-NEXT:    xxlor vs2, v5, v5
+; CHECK-BE-NEXT:    xxlor vs4, vs0, vs0
+; CHECK-BE-NEXT:    xxlor vs5, vs1, vs1
+; CHECK-BE-NEXT:    xxlor vs6, v4, v4
+; CHECK-BE-NEXT:    xxlor vs7, v5, v5
+; CHECK-BE-NEXT:    xxmtacc acc1
+; CHECK-BE-NEXT:    xvi4ger8pp acc1, v2, v3
+; CHECK-BE-NEXT:    xvf16ger2pp acc1, v2, vs1
+; CHECK-BE-NEXT:    pmxvf32gerpn acc1, v3, v5, 0, 0
+; CHECK-BE-NEXT:    pmxvf64gernp acc1, vsp2, vs0, 0, 0
+; CHECK-BE-NEXT:    xxmfacc acc1
+; CHECK-BE-NEXT:    stxv vs5, 16(r3)
+; CHECK-BE-NEXT:    stxvx vs4, 0, r3
+; CHECK-BE-NEXT:    stxv vs7, 48(r3)
+; CHECK-BE-NEXT:    stxv vs6, 32(r3)
 ; CHECK-BE-NEXT:    blr
   %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc3, <16 x i8> %vc2, <16 x i8> %vc4)
   %2 = tail call <512 x i1> @llvm.ppc.mma.xvi4ger8pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc2)

diff  --git a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
index efec3c170f8b0..4af9d46ffacdd 100644
--- a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
@@ -1057,7 +1057,7 @@ declare <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32>, <2 x i64>, i32)
 define <2 x i64> @intrinsicExtractTest(<2 x i64> %a) {
 entry:
 ; CHECK-LABEL: intrinsicExtractTest
-; CHECK: xxextractuw 0, 34, 5
+; CHECK: xxextractuw 34, 34, 5
 ; CHECK: blr
   %ans = tail call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> %a, i32 5)
   ret <2 x i64> %ans

diff  --git a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
index f488820123235..c92eed92d0531 100644
--- a/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/paired-vector-intrinsics.ll
@@ -20,30 +20,34 @@ declare <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8>, <16 x i8>)
 define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) {
 ; CHECK-LABEL: ass_pair:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmr v3, v2
-; CHECK-NEXT:    stxv v2, 16(r3)
-; CHECK-NEXT:    stxv v3, 0(r3)
+; CHECK-NEXT:    xxlor vs1, v2, v2
+; CHECK-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-NEXT:    stxv vs1, 16(r3)
+; CHECK-NEXT:    stxv vs1, 0(r3)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NOMMA-LABEL: ass_pair:
 ; CHECK-NOMMA:       # %bb.0: # %entry
-; CHECK-NOMMA-NEXT:    vmr v3, v2
-; CHECK-NOMMA-NEXT:    stxv v2, 16(r3)
-; CHECK-NOMMA-NEXT:    stxv v3, 0(r3)
+; CHECK-NOMMA-NEXT:    xxlor vs1, v2, v2
+; CHECK-NOMMA-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-NOMMA-NEXT:    stxv vs1, 16(r3)
+; CHECK-NOMMA-NEXT:    stxv vs1, 0(r3)
 ; CHECK-NOMMA-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: ass_pair:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    vmr v3, v2
-; CHECK-BE-NEXT:    stxv v2, 16(r3)
-; CHECK-BE-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NEXT:    xxlor vs1, v2, v2
+; CHECK-BE-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-BE-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    blr
 ;
 ; CHECK-BE-NOMMA-LABEL: ass_pair:
 ; CHECK-BE-NOMMA:       # %bb.0: # %entry
-; CHECK-BE-NOMMA-NEXT:    vmr v3, v2
-; CHECK-BE-NOMMA-NEXT:    stxv v2, 16(r3)
-; CHECK-BE-NOMMA-NEXT:    stxv v2, 0(r3)
+; CHECK-BE-NOMMA-NEXT:    xxlor vs1, v2, v2
+; CHECK-BE-NOMMA-NEXT:    xxlor vs0, vs1, vs1
+; CHECK-BE-NOMMA-NEXT:    stxv vs1, 16(r3)
+; CHECK-BE-NOMMA-NEXT:    stxv vs0, 0(r3)
 ; CHECK-BE-NOMMA-NEXT:    blr
 entry:
   %0 = tail call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> %vc, <16 x i8> %vc)

diff  --git a/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll b/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll
index 40e39c64deadb..942e0be33b39e 100644
--- a/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll
+++ b/llvm/test/CodeGen/PowerPC/remove-redundant-moves.ll
@@ -8,8 +8,7 @@ entry:
 ; CHECK: xxswapd [[SW:[0-9]+]], 34
 ; CHECK: xscvsxddp 1, [[SW]]
 ; CHECK-BE-LABEL: test1
-; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
-; CHECK-BE: xscvsxddp 1, [[CP]]
+; CHECK-BE: xscvsxddp 1, 34
   %0 = extractelement <2 x i64> %a, i32 0
   %1 = sitofp i64 %0 to double
   ret double %1
@@ -18,8 +17,7 @@ entry:
 define double @test2(<2 x i64> %a) {
 entry:
 ; CHECK-LABEL: test2
-; CHECK: xxlor [[CP:[0-9]+]], 34, 34
-; CHECK: xscvsxddp 1, [[CP]]
+; CHECK: xscvsxddp 1, 34
 ; CHECK-BE-LABEL: test2
 ; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
 ; CHECK-BE: xscvsxddp 1, [[SW]]
@@ -34,8 +32,7 @@ entry:
 ; CHECK: xxswapd [[SW:[0-9]+]], 34
 ; CHECK: xscvsxdsp 1, [[SW]]
 ; CHECK-BE-LABEL: test1f
-; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
-; CHECK-BE: xscvsxdsp 1, [[CP]]
+; CHECK-BE: xscvsxdsp 1, 34
   %0 = extractelement <2 x i64> %a, i32 0
   %1 = sitofp i64 %0 to float
   ret float %1
@@ -44,8 +41,7 @@ entry:
 define float @test2f(<2 x i64> %a) {
 entry:
 ; CHECK-LABEL: test2f
-; CHECK: xxlor [[CP:[0-9]+]], 34, 34
-; CHECK: xscvsxdsp 1, [[CP]]
+; CHECK: xscvsxdsp 1, 34
 ; CHECK-BE-LABEL: test2f
 ; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
 ; CHECK-BE: xscvsxdsp 1, [[SW]]
@@ -60,8 +56,7 @@ entry:
 ; CHECK: xxswapd [[SW:[0-9]+]], 34
 ; CHECK: xscvuxddp 1, [[SW]]
 ; CHECK-BE-LABEL: test1u
-; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
-; CHECK-BE: xscvuxddp 1, [[CP]]
+; CHECK-BE: xscvuxddp 1, 34
   %0 = extractelement <2 x i64> %a, i32 0
   %1 = uitofp i64 %0 to double
   ret double %1
@@ -70,8 +65,7 @@ entry:
 define double @test2u(<2 x i64> %a) {
 entry:
 ; CHECK-LABEL: test2u
-; CHECK: xxlor [[CP:[0-9]+]], 34, 34
-; CHECK: xscvuxddp 1, [[CP]]
+; CHECK: xscvuxddp 1, 34
 ; CHECK-BE-LABEL: test2u
 ; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
 ; CHECK-BE: xscvuxddp 1, [[SW]]
@@ -86,8 +80,7 @@ entry:
 ; CHECK: xxswapd [[SW:[0-9]+]], 34
 ; CHECK: xscvuxdsp 1, [[SW]]
 ; CHECK-BE-LABEL: test1fu
-; CHECK-BE: xxlor [[CP:[0-9]+]], 34, 34
-; CHECK-BE: xscvuxdsp 1, [[CP]]
+; CHECK-BE: xscvuxdsp 1, 34
   %0 = extractelement <2 x i64> %a, i32 0
   %1 = uitofp i64 %0 to float
   ret float %1
@@ -96,8 +89,7 @@ entry:
 define float @test2fu(<2 x i64> %a) {
 entry:
 ; CHECK-LABEL: test2fu
-; CHECK: xxlor [[CP:[0-9]+]], 34, 34
-; CHECK: xscvuxdsp 1, [[CP]]
+; CHECK: xscvuxdsp 1, 34
 ; CHECK-BE-LABEL: test2fu
 ; CHECK-BE: xxswapd [[SW:[0-9]+]], 34
 ; CHECK-BE: xscvuxdsp 1, [[SW]]

diff  --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
index 6a490737c710f..8a704f6b735e0 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
@@ -13,8 +13,7 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    xxlor vs1, v2, v2
-; CHECK-P8-NEXT:    xscvuxdsp f1, f1
+; CHECK-P8-NEXT:    xscvuxdsp f1, v2
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvdpspn v3, f1
 ; CHECK-P8-NEXT:    xscvdpspn v2, f0
@@ -28,8 +27,7 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    xxlor vs0, v2, v2
-; CHECK-P9-NEXT:    xscvuxdsp f0, f0
+; CHECK-P9-NEXT:    xscvuxdsp f0, v2
 ; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
@@ -38,8 +36,7 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    xxlor vs1, v2, v2
-; CHECK-BE-NEXT:    xscvuxdsp f1, f1
+; CHECK-BE-NEXT:    xscvuxdsp f1, v2
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
 ; CHECK-BE-NEXT:    xscvdpspn v2, f1
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0
@@ -302,8 +299,7 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    xxlor vs1, v2, v2
-; CHECK-P8-NEXT:    xscvsxdsp f1, f1
+; CHECK-P8-NEXT:    xscvsxdsp f1, v2
 ; CHECK-P8-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P8-NEXT:    xscvdpspn v3, f1
 ; CHECK-P8-NEXT:    xscvdpspn v2, f0
@@ -317,8 +313,7 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
 ; CHECK-P9-NEXT:    xscvdpspn v3, f0
-; CHECK-P9-NEXT:    xxlor vs0, v2, v2
-; CHECK-P9-NEXT:    xscvsxdsp f0, f0
+; CHECK-P9-NEXT:    xscvsxdsp f0, v2
 ; CHECK-P9-NEXT:    xscvdpspn v2, f0
 ; CHECK-P9-NEXT:    vmrghw v2, v2, v3
 ; CHECK-P9-NEXT:    mfvsrld r3, v2
@@ -327,8 +322,7 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
-; CHECK-BE-NEXT:    xxlor vs1, v2, v2
-; CHECK-BE-NEXT:    xscvsxdsp f1, f1
+; CHECK-BE-NEXT:    xscvsxdsp f1, v2
 ; CHECK-BE-NEXT:    xscvsxdsp f0, f0
 ; CHECK-BE-NEXT:    xscvdpspn v2, f1
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0

diff  --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 23532e2dd772b..6a6ef7f4c978c 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1366,8 +1366,7 @@ define <2 x float> @test44(<2 x i64> %a) {
 ; CHECK-LE-LABEL: test44:
 ; CHECK-LE:       # %bb.0:
 ; CHECK-LE-NEXT:    xxswapd vs0, v2
-; CHECK-LE-NEXT:    xxlor vs1, v2, v2
-; CHECK-LE-NEXT:    xscvuxdsp f1, f1
+; CHECK-LE-NEXT:    xscvuxdsp f1, v2
 ; CHECK-LE-NEXT:    xscvuxdsp f0, f0
 ; CHECK-LE-NEXT:    xscvdpspn v3, f1
 ; CHECK-LE-NEXT:    xscvdpspn v2, f0
@@ -1446,8 +1445,7 @@ define <2 x float> @test45(<2 x i64> %a) {
 ; CHECK-LE-LABEL: test45:
 ; CHECK-LE:       # %bb.0:
 ; CHECK-LE-NEXT:    xxswapd vs0, v2
-; CHECK-LE-NEXT:    xxlor vs1, v2, v2
-; CHECK-LE-NEXT:    xscvsxdsp f1, f1
+; CHECK-LE-NEXT:    xscvsxdsp f1, v2
 ; CHECK-LE-NEXT:    xscvsxdsp f0, f0
 ; CHECK-LE-NEXT:    xscvdpspn v3, f1
 ; CHECK-LE-NEXT:    xscvdpspn v2, f0


        


More information about the llvm-commits mailing list