[llvm] eec9ca6 - [PowerPC] guard update form prepare with non-const increment with option

Chen Zheng via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 14 18:18:16 PST 2021


Author: Chen Zheng
Date: 2021-11-15T02:16:46Z
New Revision: eec9ca622c2df2bcf3ffa7fad5a2381b829758b7

URL: https://github.com/llvm/llvm-project/commit/eec9ca622c2df2bcf3ffa7fad5a2381b829758b7
DIFF: https://github.com/llvm/llvm-project/commit/eec9ca622c2df2bcf3ffa7fad5a2381b829758b7.diff

LOG: [PowerPC] guard update form prepare with non-const increment with option

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D113471

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
    llvm/test/CodeGen/PowerPC/common-chain.ll
    llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
    llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 31b95cda5f61b..7f63827afbd66 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -125,6 +125,11 @@ static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
                                  cl::init(true), cl::Hidden,
   cl::desc("prefer update form when ds form is also a update form"));
 
+static cl::opt<bool> EnableUpdateFormForNonConstInc(
+    "ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden,
+    cl::desc("prepare update form when the load/store increment is a loop "
+             "invariant non-const value."));
+
 static cl::opt<bool> EnableChainCommoning(
     "ppc-formprep-chain-commoning", cl::init(false), cl::Hidden,
     cl::desc("Enable chain commoning in PPC loop prepare pass."));
@@ -212,7 +217,7 @@ namespace {
   // load/store with update like ldu/stdu, or Prefetch intrinsic.
   // For DS form instructions, their displacements must be multiple of 4.
   // For DQ form instructions, their displacements must be multiple of 16.
-  enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 };
+  enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };
 
   class PPCLoopInstrFormPrep : public FunctionPass {
   public:
@@ -255,7 +260,7 @@ namespace {
     /// Check if required PHI node is already exist in Loop \p L.
     bool alreadyPrepared(Loop *L, Instruction *MemI,
                          const SCEV *BasePtrStartSCEV,
-                         const SCEV *BasePtrIncSCEV, InstrForm Form);
+                         const SCEV *BasePtrIncSCEV, PrepForm Form);
 
     /// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
     Value *getNodeForInc(Loop *L, Instruction *MemI,
@@ -293,8 +298,7 @@ namespace {
 
     /// Prepare all candidates in \p Buckets for displacement form, now for
     /// ds/dq.
-    bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
-                      InstrForm Form);
+    bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, PrepForm Form);
 
     /// Prepare for one chain \p BucketChain, find the best base element and
     /// update all other elements in \p BucketChain accordingly.
@@ -302,8 +306,7 @@ namespace {
     /// If success, best base element must be stored as the first element of
     /// \p BucketChain.
     /// Return false if no base element found, otherwise return true.
-    bool prepareBaseForDispFormChain(Bucket &BucketChain,
-                                     InstrForm Form);
+    bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form);
 
     /// Prepare for one chain \p BucketChain, find the best base element and
     /// update all other elements in \p BucketChain accordingly.
@@ -316,12 +319,12 @@ namespace {
     /// preparation.
     bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
                            SmallSet<BasicBlock *, 16> &BBChanged,
-                           InstrForm Form);
+                           PrepForm Form);
 
     /// Rewrite for the base load/store of a chain.
     std::pair<Instruction *, Instruction *>
     rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
-                   Instruction *BaseMemI, bool CanPreInc, InstrForm Form,
+                   Instruction *BaseMemI, bool CanPreInc, PrepForm Form,
                    SCEVExpander &SCEVE, SmallPtrSet<Value *, 16> &DeletedPtrs);
 
     /// Rewrite for the other load/stores of a chain according to the new \p
@@ -572,9 +575,9 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
     assert(BasePtrSCEV->isAffine() &&
            "Invalid SCEV type for the base ptr for a candidate chain!\n");
 
-    std::pair<Instruction *, Instruction *> Base =
-        rewriteForBase(L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
-                       false /* CanPreInc */, UpdateForm, SCEVE, DeletedPtrs);
+    std::pair<Instruction *, Instruction *> Base = rewriteForBase(
+        L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
+        false /* CanPreInc */, ChainCommoning, SCEVE, DeletedPtrs);
 
     if (!Base.first || !Base.second)
       return MadeChange;
@@ -645,7 +648,7 @@ bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
 std::pair<Instruction *, Instruction *>
 PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
                                      Instruction *BaseMemI, bool CanPreInc,
-                                     InstrForm Form, SCEVExpander &SCEVE,
+                                     PrepForm Form, SCEVExpander &SCEVE,
                                      SmallPtrSet<Value *, 16> &DeletedPtrs) {
 
   LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
@@ -675,6 +678,13 @@ PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
     return std::make_pair(nullptr, nullptr);
   }
 
+  if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) {
+    LLVM_DEBUG(
+        dbgs()
+        << "Update form prepare for non-const increment is not enabled!\n");
+    return std::make_pair(nullptr, nullptr);
+  }
+
   const SCEV *BasePtrStartSCEV = nullptr;
   if (CanPreInc) {
     assert(SE->isLoopInvariant(BasePtrIncSCEV, L) &&
@@ -884,7 +894,7 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
 }
 
 bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
-                                                    InstrForm Form) {
+                                                       PrepForm Form) {
   // RemainderOffsetInfo details:
   // key:            value of (Offset urem DispConstraint). For DSForm, it can
   //                 be [0, 4).
@@ -1001,7 +1011,7 @@ bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
 
 bool PPCLoopInstrFormPrep::rewriteLoadStores(
     Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged,
-    InstrForm Form) {
+    PrepForm Form) {
   bool MadeChange = false;
 
   const SCEVAddRecExpr *BasePtrSCEV =
@@ -1098,8 +1108,9 @@ bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
   return MadeChange;
 }
 
-bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
-                                     InstrForm Form) {
+bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L,
+                                        SmallVector<Bucket, 16> &Buckets,
+                                        PrepForm Form) {
   bool MadeChange = false;
 
   if (Buckets.empty())
@@ -1202,7 +1213,7 @@ Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,
 bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
                                            const SCEV *BasePtrStartSCEV,
                                            const SCEV *BasePtrIncSCEV,
-                                           InstrForm Form) {
+                                           PrepForm Form) {
   BasicBlock *BB = MemI->getParent();
   if (!BB)
     return false;
@@ -1242,7 +1253,7 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
         if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
           // The existing PHI (CurrentPHINode) has the same start and increment
           // as the PHI that we wanted to create.
-          if (Form == UpdateForm &&
+          if ((Form == UpdateForm || Form == ChainCommoning ) &&
               PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
             ++PHINodeAlreadyExistsUpdate;
             return true;

diff  --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index 595b4836f4367..98b8d91a9be4b 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -771,9 +771,9 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r31, -8(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r2, -152(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r9, -176(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r8, -168(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r7, -160(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r9, -160(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r8, -176(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    std r7, -168(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    blt cr0, .LBB7_7
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    sldi r6, r6, 2
@@ -789,66 +789,71 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
 ; CHECK-NEXT:    rldicl r7, r7, 62, 2
 ; CHECK-NEXT:    sldi r10, r12, 2
 ; CHECK-NEXT:    ld r2, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r31, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    rldicl r7, r7, 2, 1
 ; CHECK-NEXT:    std r7, -184(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld r7, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    add r8, r7, r10
+; CHECK-NEXT:    mr r22, r7
 ; CHECK-NEXT:    mr r7, r4
-; CHECK-NEXT:    ld r4, -176(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    add r8, r4, r10
+; CHECK-NEXT:    mr r4, r3
+; CHECK-NEXT:    ld r3, -176(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    sldi r8, r8, 3
 ; CHECK-NEXT:    add r9, r5, r8
-; CHECK-NEXT:    add r8, r2, r10
-; CHECK-NEXT:    add r10, r31, r10
+; CHECK-NEXT:    add r8, r3, r10
+; CHECK-NEXT:    add r10, r2, r10
 ; CHECK-NEXT:    sldi r10, r10, 3
 ; CHECK-NEXT:    sldi r8, r8, 3
 ; CHECK-NEXT:    add r30, r5, r10
 ; CHECK-NEXT:    add r29, r7, r10
-; CHECK-NEXT:    add r28, r3, r10
+; CHECK-NEXT:    add r28, r4, r10
 ; CHECK-NEXT:    sldi r10, r12, 1
 ; CHECK-NEXT:    add r8, r5, r8
 ; CHECK-NEXT:    add r11, r12, r10
-; CHECK-NEXT:    add r0, r4, r11
+; CHECK-NEXT:    add r0, r22, r11
 ; CHECK-NEXT:    sldi r0, r0, 3
 ; CHECK-NEXT:    add r27, r5, r0
-; CHECK-NEXT:    add r0, r2, r11
-; CHECK-NEXT:    add r11, r31, r11
+; CHECK-NEXT:    add r0, r3, r11
+; CHECK-NEXT:    add r11, r2, r11
 ; CHECK-NEXT:    sldi r11, r11, 3
 ; CHECK-NEXT:    sldi r0, r0, 3
 ; CHECK-NEXT:    add r25, r5, r11
 ; CHECK-NEXT:    add r24, r7, r11
-; CHECK-NEXT:    add r23, r3, r11
-; CHECK-NEXT:    add r11, r4, r10
+; CHECK-NEXT:    add r23, r4, r11
+; CHECK-NEXT:    add r11, r22, r10
 ; CHECK-NEXT:    add r26, r5, r0
+; CHECK-NEXT:    mr r0, r22
 ; CHECK-NEXT:    sldi r11, r11, 3
 ; CHECK-NEXT:    add r22, r5, r11
-; CHECK-NEXT:    add r11, r2, r10
-; CHECK-NEXT:    add r10, r31, r10
+; CHECK-NEXT:    add r11, r3, r10
+; CHECK-NEXT:    add r10, r2, r10
 ; CHECK-NEXT:    sldi r10, r10, 3
 ; CHECK-NEXT:    sldi r11, r11, 3
 ; CHECK-NEXT:    add r20, r5, r10
 ; CHECK-NEXT:    add r19, r7, r10
-; CHECK-NEXT:    add r18, r3, r10
-; CHECK-NEXT:    add r10, r12, r4
+; CHECK-NEXT:    add r18, r4, r10
+; CHECK-NEXT:    add r10, r12, r0
 ; CHECK-NEXT:    add r21, r5, r11
 ; CHECK-NEXT:    sldi r11, r2, 3
 ; CHECK-NEXT:    sldi r10, r10, 3
 ; CHECK-NEXT:    add r17, r5, r10
-; CHECK-NEXT:    add r10, r12, r2
+; CHECK-NEXT:    add r10, r12, r3
 ; CHECK-NEXT:    sldi r10, r10, 3
 ; CHECK-NEXT:    add r16, r5, r10
-; CHECK-NEXT:    add r10, r12, r31
-; CHECK-NEXT:    sldi r31, r31, 3
-; CHECK-NEXT:    sub r0, r11, r31
-; CHECK-NEXT:    sldi r11, r4, 3
-; CHECK-NEXT:    mr r4, r7
-; CHECK-NEXT:    ld r7, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    add r10, r12, r2
 ; CHECK-NEXT:    sldi r10, r10, 3
 ; CHECK-NEXT:    add r15, r5, r10
-; CHECK-NEXT:    add r14, r3, r10
-; CHECK-NEXT:    sub r31, r11, r31
-; CHECK-NEXT:    add r2, r4, r10
+; CHECK-NEXT:    add r14, r7, r10
+; CHECK-NEXT:    add r31, r4, r10
+; CHECK-NEXT:    sldi r10, r3, 3
+; CHECK-NEXT:    mr r3, r4
+; CHECK-NEXT:    mr r4, r7
+; CHECK-NEXT:    ld r7, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    sub r0, r10, r11
+; CHECK-NEXT:    sldi r10, r7, 3
+; CHECK-NEXT:    ld r7, -184(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    sub r2, r10, r11
 ; CHECK-NEXT:    li r11, 0
 ; CHECK-NEXT:    mr r10, r12
-; CHECK-NEXT:    rldicl r7, r7, 2, 1
 ; CHECK-NEXT:    addi r7, r7, -4
 ; CHECK-NEXT:    rldicl r7, r7, 62, 2
 ; CHECK-NEXT:    addi r7, r7, 1
@@ -857,8 +862,8 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB7_3: # %for.body
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lfd f0, 0(r14)
-; CHECK-NEXT:    lfd f1, 0(r2)
+; CHECK-NEXT:    lfd f0, 0(r31)
+; CHECK-NEXT:    lfd f1, 0(r14)
 ; CHECK-NEXT:    add r10, r10, r12
 ; CHECK-NEXT:    add r10, r10, r12
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
@@ -868,16 +873,16 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
 ; CHECK-NEXT:    xsadddp f0, f1, f0
 ; CHECK-NEXT:    stfd f0, 0(r15)
 ; CHECK-NEXT:    add r15, r15, r7
-; CHECK-NEXT:    lfdx f0, r14, r0
-; CHECK-NEXT:    lfdx f1, r2, r0
+; CHECK-NEXT:    lfdx f0, r31, r0
+; CHECK-NEXT:    lfdx f1, r14, r0
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
 ; CHECK-NEXT:    lfdx f1, r16, r11
 ; CHECK-NEXT:    xsadddp f0, f1, f0
 ; CHECK-NEXT:    stfdx f0, r16, r11
-; CHECK-NEXT:    lfdx f0, r14, r31
-; CHECK-NEXT:    lfdx f1, r2, r31
+; CHECK-NEXT:    lfdx f0, r31, r2
+; CHECK-NEXT:    lfdx f1, r14, r2
+; CHECK-NEXT:    add r31, r31, r7
 ; CHECK-NEXT:    add r14, r14, r7
-; CHECK-NEXT:    add r2, r2, r7
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
 ; CHECK-NEXT:    lfdx f1, r17, r11
 ; CHECK-NEXT:    xsadddp f0, f1, f0
@@ -894,8 +899,8 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
 ; CHECK-NEXT:    lfdx f1, r21, r11
 ; CHECK-NEXT:    xsadddp f0, f1, f0
 ; CHECK-NEXT:    stfdx f0, r21, r11
-; CHECK-NEXT:    lfdx f0, r18, r31
-; CHECK-NEXT:    lfdx f1, r19, r31
+; CHECK-NEXT:    lfdx f0, r18, r2
+; CHECK-NEXT:    lfdx f1, r19, r2
 ; CHECK-NEXT:    add r18, r18, r7
 ; CHECK-NEXT:    add r19, r19, r7
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
@@ -914,8 +919,8 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
 ; CHECK-NEXT:    lfdx f1, r26, r11
 ; CHECK-NEXT:    xsadddp f0, f1, f0
 ; CHECK-NEXT:    stfdx f0, r26, r11
-; CHECK-NEXT:    lfdx f0, r23, r31
-; CHECK-NEXT:    lfdx f1, r24, r31
+; CHECK-NEXT:    lfdx f0, r23, r2
+; CHECK-NEXT:    lfdx f1, r24, r2
 ; CHECK-NEXT:    add r23, r23, r7
 ; CHECK-NEXT:    add r24, r24, r7
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
@@ -934,8 +939,8 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
 ; CHECK-NEXT:    lfdx f1, r8, r11
 ; CHECK-NEXT:    xsadddp f0, f1, f0
 ; CHECK-NEXT:    stfdx f0, r8, r11
-; CHECK-NEXT:    lfdx f0, r28, r31
-; CHECK-NEXT:    lfdx f1, r29, r31
+; CHECK-NEXT:    lfdx f0, r28, r2
+; CHECK-NEXT:    lfdx f1, r29, r2
 ; CHECK-NEXT:    add r28, r28, r7
 ; CHECK-NEXT:    add r29, r29, r7
 ; CHECK-NEXT:    xsmuldp f0, f0, f1
@@ -948,46 +953,44 @@ define signext i32 @spill_reduce_succ(double* %input1, double* %input2, double*
 ; CHECK-NEXT:    cmpldi r6, 0
 ; CHECK-NEXT:    beq cr0, .LBB7_7
 ; CHECK-NEXT:  # %bb.5: # %for.body.epil.preheader
-; CHECK-NEXT:    ld r0, -168(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld r7, -176(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    sldi r8, r12, 3
-; CHECK-NEXT:    add r0, r10, r0
+; CHECK-NEXT:    ld r12, -176(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld r7, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    add r12, r10, r12
 ; CHECK-NEXT:    add r7, r10, r7
-; CHECK-NEXT:    sldi r0, r0, 3
+; CHECK-NEXT:    sldi r0, r12, 3
 ; CHECK-NEXT:    sldi r11, r7, 3
-; CHECK-NEXT:    add r30, r5, r0
-; CHECK-NEXT:    add r29, r4, r0
-; CHECK-NEXT:    add r28, r3, r0
-; CHECK-NEXT:    ld r0, -160(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    add r12, r5, r0
+; CHECK-NEXT:    add r30, r4, r0
+; CHECK-NEXT:    add r29, r3, r0
+; CHECK-NEXT:    ld r0, -168(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    add r7, r5, r11
 ; CHECK-NEXT:    add r9, r4, r11
 ; CHECK-NEXT:    add r11, r3, r11
 ; CHECK-NEXT:    add r10, r10, r0
-; CHECK-NEXT:    sub r12, r10, r12
 ; CHECK-NEXT:    sldi r10, r10, 3
-; CHECK-NEXT:    sldi r12, r12, 3
 ; CHECK-NEXT:    add r5, r5, r10
+; CHECK-NEXT:    add r4, r4, r10
+; CHECK-NEXT:    add r3, r3, r10
 ; CHECK-NEXT:    li r10, 0
-; CHECK-NEXT:    add r3, r3, r12
-; CHECK-NEXT:    add r4, r4, r12
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB7_6: # %for.body.epil
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lfdux f0, r4, r8
-; CHECK-NEXT:    lfdux f1, r3, r8
+; CHECK-NEXT:    lfdx f0, r3, r10
+; CHECK-NEXT:    lfdx f1, r4, r10
 ; CHECK-NEXT:    addi r6, r6, -1
 ; CHECK-NEXT:    cmpldi r6, 0
-; CHECK-NEXT:    xsmuldp f0, f1, f0
+; CHECK-NEXT:    xsmuldp f0, f0, f1
 ; CHECK-NEXT:    lfd f1, 0(r5)
 ; CHECK-NEXT:    xsadddp f0, f1, f0
 ; CHECK-NEXT:    stfd f0, 0(r5)
 ; CHECK-NEXT:    add r5, r5, r8
-; CHECK-NEXT:    lfdx f0, r28, r10
-; CHECK-NEXT:    lfdx f1, r29, r10
-; CHECK-NEXT:    xsmuldp f0, f0, f1
+; CHECK-NEXT:    lfdx f0, r29, r10
 ; CHECK-NEXT:    lfdx f1, r30, r10
+; CHECK-NEXT:    xsmuldp f0, f0, f1
+; CHECK-NEXT:    lfdx f1, r12, r10
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    stfdx f0, r30, r10
+; CHECK-NEXT:    stfdx f0, r12, r10
 ; CHECK-NEXT:    lfdx f0, r11, r10
 ; CHECK-NEXT:    lfdx f1, r9, r10
 ; CHECK-NEXT:    xsmuldp f0, f0, f1

diff  --git a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
index 3bc018286fea1..d1b9554abdf50 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-prep-non-const-increasement.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -disable-lsr -ppc-asm-full-reg-names -verify-machineinstrs \
-; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
+; RUN:   -ppc-formprep-update-nonconst-inc -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 < %s | FileCheck %s
 
 ; long long foo(char *p, int n, int count) {
 ;   int j = 0;

diff  --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
index b449dc8a46944..478e15ac33c89 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -45,22 +45,22 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
 ; CHECK-NEXT:  # %bb.4:
 ; CHECK-NEXT:    add 23, 6, 12
 ; CHECK-NEXT:    add 22, 6, 30
-; CHECK-NEXT:    add 26, 6, 28
-; CHECK-NEXT:    add 25, 6, 8
-; CHECK-NEXT:    sldi 24, 6, 3
-; CHECK-NEXT:    sldi 26, 26, 3
+; CHECK-NEXT:    add 25, 6, 28
+; CHECK-NEXT:    add 24, 6, 8
+; CHECK-NEXT:    sldi 26, 6, 3
 ; CHECK-NEXT:    sldi 25, 25, 3
+; CHECK-NEXT:    sldi 24, 24, 3
 ; CHECK-NEXT:    sldi 23, 23, 3
 ; CHECK-NEXT:    sldi 22, 22, 3
-; CHECK-NEXT:    add 24, 4, 24
-; CHECK-NEXT:    add 26, 29, 26
+; CHECK-NEXT:    add 26, 4, 26
 ; CHECK-NEXT:    add 25, 29, 25
+; CHECK-NEXT:    add 24, 29, 24
 ; CHECK-NEXT:    add 23, 3, 23
 ; CHECK-NEXT:    add 22, 3, 22
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_5: # Parent Loop BB0_3 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    lfd 0, 0(24)
+; CHECK-NEXT:    lfd 0, 0(26)
 ; CHECK-NEXT:    lfd 1, 0(23)
 ; CHECK-NEXT:    add 6, 6, 10
 ; CHECK-NEXT:    cmpd 6, 27
@@ -81,6 +81,15 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
 ; CHECK-NEXT:    lfd 1, 24(22)
 ; CHECK-NEXT:    add 22, 22, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
+; CHECK-NEXT:    lfd 1, -16(24)
+; CHECK-NEXT:    xsadddp 0, 0, 1
+; CHECK-NEXT:    lfd 1, -8(24)
+; CHECK-NEXT:    xsadddp 0, 0, 1
+; CHECK-NEXT:    lfd 1, 0(24)
+; CHECK-NEXT:    xsadddp 0, 0, 1
+; CHECK-NEXT:    lfd 1, 8(24)
+; CHECK-NEXT:    add 24, 24, 11
+; CHECK-NEXT:    xsadddp 0, 0, 1
 ; CHECK-NEXT:    lfd 1, -16(25)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
 ; CHECK-NEXT:    lfd 1, -8(25)
@@ -90,17 +99,8 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5
 ; CHECK-NEXT:    lfd 1, 8(25)
 ; CHECK-NEXT:    add 25, 25, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -16(26)
-; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(26)
-; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(26)
-; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(26)
+; CHECK-NEXT:    stfd 0, 0(26)
 ; CHECK-NEXT:    add 26, 26, 11
-; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    stfd 0, 0(24)
-; CHECK-NEXT:    add 24, 24, 11
 ; CHECK-NEXT:    blt 0, .LBB0_5
 ; CHECK-NEXT:    b .LBB0_2
 ; CHECK-NEXT:  .LBB0_6:


        


More information about the llvm-commits mailing list