[llvm-branch-commits] [llvm] 050c335 - [PowerPC] Replace MFVSRLD with MFVSRD when the vector is symmetrical

Stefan Pintilie via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Nov 26 12:32:25 PST 2021


Author: Stefan Pintilie
Date: 2021-11-26T14:27:46-06:00
New Revision: 050c335ab42f96acfdfa6572cad1310e685cd199

URL: https://github.com/llvm/llvm-project/commit/050c335ab42f96acfdfa6572cad1310e685cd199
DIFF: https://github.com/llvm/llvm-project/commit/050c335ab42f96acfdfa6572cad1310e685cd199.diff

LOG: [PowerPC] Replace MFVSRLD with MFVSRD when the vector is symmetrical

The MFVSRD is faster than the MFVSRLD instruction and if the input vector is
symmetrical then both instructions produce the same result and we should prefer
the faster one.

WIP.

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
    llvm/test/CodeGen/PowerPC/vector-reduce-add.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index d12a9b806fd03..ac1204f230656 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1064,6 +1064,49 @@ bool PPCMIPeephole::simplifyCode(void) {
         Simplified = true;
         break;
       }
+      case PPC::MFVSRLD:
+        // It is more efficient to use MFVSRD instead of MFVSRLD in cases where
+        // it is known that the two doublewords of the vector are identical.
+        MachineOperand &VecInput = MI.getOperand(1);
+        MachineInstr *DefVecReg = getVRegDefOrNull(&VecInput, MRI);
+        if (DefVecReg && DefVecReg->getOpcode() == PPC::VADDUDM) {
+          MachineOperand &AddInput1 = DefVecReg->getOperand(1);
+          MachineOperand &AddInput2 = DefVecReg->getOperand(2);
+          MachineInstr *DefAdd1 = getVRegDefOrNull(&AddInput1, MRI);
+          MachineInstr *DefAdd2 = getVRegDefOrNull(&AddInput2, MRI);
+
+          if (!DefAdd1 || !DefAdd2)
+            break;
+
+          // Check if one of the inputs to the vector add is a permute.
+          MachineInstr *PermInstr = nullptr;
+          Register NonPermReg;
+          if (DefAdd1->getOpcode() == PPC::XXPERMDI) {
+            PermInstr = DefAdd1;
+            NonPermReg = AddInput2.getReg();
+          } else if (DefAdd2->getOpcode() == PPC::XXPERMDI) {
+            PermInstr = DefAdd2;
+            NonPermReg = AddInput1.getReg();
+          }
+
+          // Abort if there is no permute instruction.
+          if (!PermInstr)
+            break;
+
+          MachineOperand &PermInput1 = PermInstr->getOperand(1);
+          MachineOperand &PermInput2 = PermInstr->getOperand(2);
+          unsigned PermType = PermInstr->getOperand(3).getImm();
+          if (PermInput1.getReg() == PermInput2.getReg() && PermType == 2 &&
+              PermInput1.getReg() == NonPermReg) {
+            Register VecInputReg = VecInput.getReg();
+            BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::MFVSRD),
+                    MI.getOperand(0).getReg())
+                .addReg(VecInputReg, 0, PPC::sub_64);
+            ToErase = &MI;
+            Simplified = true;
+          }
+        }
+        break;
       }
     }
 

diff  --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
index 2ba113f59da3c..b2ecce6777e05 100644
--- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll
@@ -1245,7 +1245,7 @@ define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 {
 ; PWR9LE:       # %bb.0: # %entry
 ; PWR9LE-NEXT:    xxswapd v3, v2
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
-; PWR9LE-NEXT:    mfvsrld r3, v2
+; PWR9LE-NEXT:    mfvsrd r3, v2
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v2i64:
@@ -1259,7 +1259,7 @@ define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 {
 ; PWR10LE:       # %bb.0: # %entry
 ; PWR10LE-NEXT:    xxswapd v3, v2
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
-; PWR10LE-NEXT:    mfvsrld r3, v2
+; PWR10LE-NEXT:    mfvsrd r3, v2
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v2i64:
@@ -1279,7 +1279,7 @@ define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
 ; PWR9LE-NEXT:    xxswapd v3, v2
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
-; PWR9LE-NEXT:    mfvsrld r3, v2
+; PWR9LE-NEXT:    mfvsrd r3, v2
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v4i64:
@@ -1295,7 +1295,7 @@ define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
 ; PWR10LE-NEXT:    xxswapd v3, v2
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
-; PWR10LE-NEXT:    mfvsrld r3, v2
+; PWR10LE-NEXT:    mfvsrd r3, v2
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v4i64:
@@ -1318,7 +1318,7 @@ define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
 ; PWR9LE-NEXT:    xxswapd v3, v2
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
-; PWR9LE-NEXT:    mfvsrld r3, v2
+; PWR9LE-NEXT:    mfvsrd r3, v2
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v8i64:
@@ -1338,7 +1338,7 @@ define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
 ; PWR10LE-NEXT:    xxswapd v3, v2
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
-; PWR10LE-NEXT:    mfvsrld r3, v2
+; PWR10LE-NEXT:    mfvsrd r3, v2
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v8i64:
@@ -1367,7 +1367,7 @@ define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
 ; PWR9LE-NEXT:    xxswapd v3, v2
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
-; PWR9LE-NEXT:    mfvsrld r3, v2
+; PWR9LE-NEXT:    mfvsrd r3, v2
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v16i64:
@@ -1395,7 +1395,7 @@ define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
 ; PWR10LE-NEXT:    xxswapd v3, v2
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
-; PWR10LE-NEXT:    mfvsrld r3, v2
+; PWR10LE-NEXT:    mfvsrd r3, v2
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v16i64:
@@ -1468,7 +1468,7 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
 ; PWR9LE-NEXT:    xxswapd v3, v2
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
-; PWR9LE-NEXT:    mfvsrld r3, v2
+; PWR9LE-NEXT:    mfvsrd r3, v2
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v16i8tov16i64_sign:
@@ -1560,7 +1560,7 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
 ; PWR10LE-NEXT:    xxswapd v3, v2
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
-; PWR10LE-NEXT:    mfvsrld r3, v2
+; PWR10LE-NEXT:    mfvsrd r3, v2
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v16i8tov16i64_sign:
@@ -1667,7 +1667,7 @@ define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 {
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
 ; PWR9LE-NEXT:    xxswapd v3, v2
 ; PWR9LE-NEXT:    vaddudm v2, v2, v3
-; PWR9LE-NEXT:    mfvsrld r3, v2
+; PWR9LE-NEXT:    mfvsrd r3, v2
 ; PWR9LE-NEXT:    blr
 ;
 ; PWR9BE-LABEL: v16i8tov16i64_zero:
@@ -1745,7 +1745,7 @@ define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 {
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
 ; PWR10LE-NEXT:    xxswapd v3, v2
 ; PWR10LE-NEXT:    vaddudm v2, v2, v3
-; PWR10LE-NEXT:    mfvsrld r3, v2
+; PWR10LE-NEXT:    mfvsrd r3, v2
 ; PWR10LE-NEXT:    blr
 ;
 ; PWR10BE-LABEL: v16i8tov16i64_zero:


        


More information about the llvm-branch-commits mailing list