[llvm] r364886 - [PowerPC] Implement the areMemAccessesTriviallyDisjoint hook

QingShan Zhang via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 20:28:53 PDT 2019


Author: qshanz
Date: Mon Jul  1 20:28:52 2019
New Revision: 364886

URL: http://llvm.org/viewvc/llvm-project?rev=364886&view=rev
Log:
[PowerPC] Implement the areMemAccessesTriviallyDisjoint hook
After implemented this hook, we will model the memory dependency in the scheduling dependency graph more precise,
and will have more opportunity to reorder the load/stores, as they didn't have the dependency at some condition

Differential Revision: https://reviews.llvm.org/D63804

Added:
    llvm/trunk/test/CodeGen/PowerPC/scheduling-mem-dependency.ll
Modified:
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h
    llvm/trunk/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
    llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll
    llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll
    llvm/trunk/test/CodeGen/PowerPC/legalize-vaarg.ll
    llvm/trunk/test/CodeGen/PowerPC/ppc32-skip-regs.ll
    llvm/trunk/test/CodeGen/PowerPC/varargs.ll
    llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp Mon Jul  1 20:28:52 2019
@@ -3996,3 +3996,59 @@ unsigned PPCInstrInfo::reduceLoopCount(
   return LoopCountReg;
 }
 
+// Return true if get the base operand, byte offset of an instruction and the
+// memory width. Width is the size of memory that is being loaded/stored.
+bool PPCInstrInfo::getMemOperandWithOffsetWidth(
+  const MachineInstr &LdSt,
+  const MachineOperand *&BaseReg,
+  int64_t &Offset,
+  unsigned &Width,
+  const TargetRegisterInfo *TRI) const {
+  assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+
+  // Handle only loads/stores with base register followed by immediate offset.
+  if (LdSt.getNumExplicitOperands() != 3)
+    return false;
+  if (!LdSt.getOperand(1).isImm() || !LdSt.getOperand(2).isReg())
+    return false;
+
+  if (!LdSt.hasOneMemOperand())
+    return false;
+
+  Width = (*LdSt.memoperands_begin())->getSize();
+  Offset = LdSt.getOperand(1).getImm();
+  BaseReg = &LdSt.getOperand(2);
+  return true;
+}
+
+bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
+    const MachineInstr &MIa, const MachineInstr &MIb,
+    AliasAnalysis * /*AA*/) const {
+  assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
+  assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
+
+  if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
+      MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
+    return false;
+
+  // Retrieve the base register, offset from the base register and width. Width
+  // is the size of memory that is being loaded/stored (e.g. 1, 2, 4).  If
+  // base registers are identical, and the offset of a lower memory access +
+  // the width doesn't overlap the offset of a higher memory access,
+  // then the memory accesses are different.
+  const TargetRegisterInfo *TRI = &getRegisterInfo();
+  const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
+  int64_t OffsetA = 0, OffsetB = 0;
+  unsigned int WidthA = 0, WidthB = 0;
+  if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
+      getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
+    if (BaseOpA->isIdenticalTo(*BaseOpB)) {
+      int LowOffset = std::min(OffsetA, OffsetB);
+      int HighOffset = std::max(OffsetA, OffsetB);
+      int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+      if (LowOffset + LowWidth <= HighOffset)
+        return true;
+    }
+  }
+  return false;
+}

Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.h Mon Jul  1 20:28:52 2019
@@ -357,6 +357,22 @@ public:
                             unsigned SrcReg2, int Mask, int Value,
                             const MachineRegisterInfo *MRI) const override;
 
+
+  /// Return true if get the base operand, byte offset of an instruction and
+  /// the memory width. Width is the size of memory that is being
+  /// loaded/stored (e.g. 1, 2, 4, 8).
+  bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
+                                    const MachineOperand *&BaseOp,
+                                    int64_t &Offset, unsigned &Width,
+                                    const TargetRegisterInfo *TRI) const;
+
+  /// Return true if two MIs access different memory addresses and false
+  /// otherwise
+  bool
+  areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
+                                  const MachineInstr &MIb,
+                                  AliasAnalysis *AA = nullptr) const override;
+
   /// GetInstSize - Return the number of bytes of code the specified
   /// instruction may be.  This returns the maximum number of bytes.
   ///

Modified: llvm/trunk/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll Mon Jul  1 20:28:52 2019
@@ -12,26 +12,26 @@ define i64 @__fixunstfdi(ppc_fp128 %a) n
 ; CHECK-NEXT:    stw 30, 416(1) # 4-byte Folded Spill
 ; CHECK-NEXT:    lis 3, .LCPI0_0 at ha
 ; CHECK-NEXT:    stw 12, 408(1)
+; CHECK-NEXT:    stfd 2, 376(1)
 ; CHECK-NEXT:    stfd 27, 424(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 1, 384(1)
 ; CHECK-NEXT:    stfd 28, 432(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd 29, 440(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd 30, 448(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd 31, 456(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 2, 376(1)
-; CHECK-NEXT:    lfs 27, .LCPI0_0 at l(3)
-; CHECK-NEXT:    stfd 1, 384(1)
 ; CHECK-NEXT:    lwz 4, 380(1)
+; CHECK-NEXT:    lfs 27, .LCPI0_0 at l(3)
+; CHECK-NEXT:    lwz 3, 384(1)
+; CHECK-NEXT:    stw 4, 396(1)
 ; CHECK-NEXT:    fcmpu 0, 2, 27
+; CHECK-NEXT:    lwz 4, 376(1)
 ; CHECK-NEXT:    fcmpu 1, 1, 27
 ; CHECK-NEXT:    crand 20, 6, 0
-; CHECK-NEXT:    stw 4, 396(1)
 ; CHECK-NEXT:    cror 20, 4, 20
-; CHECK-NEXT:    lwz 4, 376(1)
 ; CHECK-NEXT:    stw 4, 392(1)
+; CHECK-NEXT:    stw 3, 400(1)
 ; CHECK-NEXT:    lwz 4, 388(1)
 ; CHECK-NEXT:    stw 4, 404(1)
-; CHECK-NEXT:    lwz 3, 384(1)
-; CHECK-NEXT:    stw 3, 400(1)
 ; CHECK-NEXT:    bc 4, 20, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb5
 ; CHECK-NEXT:    li 3, 0
@@ -39,50 +39,50 @@ define i64 @__fixunstfdi(ppc_fp128 %a) n
 ; CHECK-NEXT:    b .LBB0_16
 ; CHECK-NEXT:  .LBB0_2: # %bb1
 ; CHECK-NEXT:    lfd 0, 400(1)
+; CHECK-NEXT:    lis 3, 15856
+; CHECK-NEXT:    stw 3, 336(1)
 ; CHECK-NEXT:    lfd 1, 392(1)
 ; CHECK-NEXT:    li 29, 0
-; CHECK-NEXT:    lis 3, 15856
 ; CHECK-NEXT:    stfd 0, 304(1)
-; CHECK-NEXT:    stfd 1, 296(1)
 ; CHECK-NEXT:    stw 29, 340(1)
-; CHECK-NEXT:    stw 3, 336(1)
 ; CHECK-NEXT:    stw 29, 332(1)
 ; CHECK-NEXT:    stw 29, 328(1)
 ; CHECK-NEXT:    lwz 3, 308(1)
+; CHECK-NEXT:    stfd 1, 296(1)
+; CHECK-NEXT:    lfd 3, 336(1)
+; CHECK-NEXT:    lfd 4, 328(1)
 ; CHECK-NEXT:    stw 3, 324(1)
 ; CHECK-NEXT:    lwz 3, 304(1)
 ; CHECK-NEXT:    stw 3, 320(1)
 ; CHECK-NEXT:    lwz 3, 300(1)
+; CHECK-NEXT:    lfd 31, 320(1)
 ; CHECK-NEXT:    stw 3, 316(1)
+; CHECK-NEXT:    fmr 1, 31
 ; CHECK-NEXT:    lwz 3, 296(1)
 ; CHECK-NEXT:    stw 3, 312(1)
-; CHECK-NEXT:    lfd 31, 320(1)
 ; CHECK-NEXT:    lfd 30, 312(1)
-; CHECK-NEXT:    lfd 3, 336(1)
-; CHECK-NEXT:    lfd 4, 328(1)
-; CHECK-NEXT:    fmr 1, 31
 ; CHECK-NEXT:    fmr 2, 30
 ; CHECK-NEXT:    bl __gcc_qmul at PLT
 ; CHECK-NEXT:    lis 3, 16864
 ; CHECK-NEXT:    stfd 1, 280(1)
-; CHECK-NEXT:    fmr 29, 1
+; CHECK-NEXT:    stw 3, 368(1)
 ; CHECK-NEXT:    stfd 2, 288(1)
-; CHECK-NEXT:    fmr 28, 2
 ; CHECK-NEXT:    stw 29, 372(1)
-; CHECK-NEXT:    stw 3, 368(1)
 ; CHECK-NEXT:    stw 29, 364(1)
 ; CHECK-NEXT:    stw 29, 360(1)
+; CHECK-NEXT:    fmr 29, 1
 ; CHECK-NEXT:    lwz 3, 284(1)
+; CHECK-NEXT:    fmr 28, 2
+; CHECK-NEXT:    lfd 3, 368(1)
+; CHECK-NEXT:    lfd 4, 360(1)
 ; CHECK-NEXT:    stw 3, 356(1)
 ; CHECK-NEXT:    lwz 3, 280(1)
 ; CHECK-NEXT:    stw 3, 352(1)
 ; CHECK-NEXT:    lwz 3, 292(1)
+; CHECK-NEXT:    lfd 1, 352(1)
 ; CHECK-NEXT:    stw 3, 348(1)
 ; CHECK-NEXT:    lwz 3, 288(1)
 ; CHECK-NEXT:    stw 3, 344(1)
-; CHECK-NEXT:    lfd 3, 368(1)
-; CHECK-NEXT:    lfd 4, 360(1)
-; CHECK-NEXT:    lfd 1, 352(1)
 ; CHECK-NEXT:    lfd 2, 344(1)
 ; CHECK-NEXT:    bl __gcc_qsub at PLT
 ; CHECK-NEXT:    mffs 0
@@ -102,8 +102,8 @@ define i64 @__fixunstfdi(ppc_fp128 %a) n
 ; CHECK-NEXT:    lfs 1, .LCPI0_1 at l(3)
 ; CHECK-NEXT:    fctiwz 0, 0
 ; CHECK-NEXT:    stfd 0, 152(1)
-; CHECK-NEXT:    fcmpu 1, 29, 1
 ; CHECK-NEXT:    lwz 3, 164(1)
+; CHECK-NEXT:    fcmpu 1, 29, 1
 ; CHECK-NEXT:    lwz 4, 156(1)
 ; CHECK-NEXT:    crandc 20, 6, 0
 ; CHECK-NEXT:    cror 20, 5, 20
@@ -120,25 +120,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) n
 ; CHECK-NEXT:    bl __floatditf at PLT
 ; CHECK-NEXT:    lis 3, 17392
 ; CHECK-NEXT:    stfd 1, 208(1)
-; CHECK-NEXT:    fmr 29, 1
+; CHECK-NEXT:    stw 3, 240(1)
 ; CHECK-NEXT:    stfd 2, 200(1)
-; CHECK-NEXT:    fmr 28, 2
 ; CHECK-NEXT:    stw 29, 244(1)
-; CHECK-NEXT:    stw 3, 240(1)
-; CHECK-NEXT:    cmpwi 2, 30, 0
 ; CHECK-NEXT:    stw 29, 236(1)
 ; CHECK-NEXT:    stw 29, 232(1)
+; CHECK-NEXT:    fmr 29, 1
 ; CHECK-NEXT:    lwz 3, 212(1)
+; CHECK-NEXT:    fmr 28, 2
+; CHECK-NEXT:    lfd 3, 240(1)
+; CHECK-NEXT:    lfd 4, 232(1)
+; CHECK-NEXT:    cmpwi 2, 30, 0
 ; CHECK-NEXT:    stw 3, 228(1)
 ; CHECK-NEXT:    lwz 3, 208(1)
 ; CHECK-NEXT:    stw 3, 224(1)
 ; CHECK-NEXT:    lwz 3, 204(1)
+; CHECK-NEXT:    lfd 1, 224(1)
 ; CHECK-NEXT:    stw 3, 220(1)
 ; CHECK-NEXT:    lwz 3, 200(1)
 ; CHECK-NEXT:    stw 3, 216(1)
-; CHECK-NEXT:    lfd 3, 240(1)
-; CHECK-NEXT:    lfd 4, 232(1)
-; CHECK-NEXT:    lfd 1, 224(1)
 ; CHECK-NEXT:    lfd 2, 216(1)
 ; CHECK-NEXT:    bl __gcc_qadd at PLT
 ; CHECK-NEXT:    blt 2, .LBB0_7
@@ -150,60 +150,60 @@ define i64 @__fixunstfdi(ppc_fp128 %a) n
 ; CHECK-NEXT:    fmr 1, 29
 ; CHECK-NEXT:  .LBB0_9: # %bb1
 ; CHECK-NEXT:    stfd 1, 184(1)
-; CHECK-NEXT:    fmr 1, 31
 ; CHECK-NEXT:    stfd 2, 192(1)
-; CHECK-NEXT:    fmr 2, 30
+; CHECK-NEXT:    fmr 1, 31
 ; CHECK-NEXT:    lwz 3, 188(1)
+; CHECK-NEXT:    fmr 2, 30
 ; CHECK-NEXT:    stw 3, 260(1)
 ; CHECK-NEXT:    lwz 3, 184(1)
 ; CHECK-NEXT:    stw 3, 256(1)
 ; CHECK-NEXT:    lwz 3, 196(1)
+; CHECK-NEXT:    lfd 3, 256(1)
 ; CHECK-NEXT:    stw 3, 252(1)
 ; CHECK-NEXT:    lwz 3, 192(1)
 ; CHECK-NEXT:    stw 3, 248(1)
-; CHECK-NEXT:    lfd 3, 256(1)
 ; CHECK-NEXT:    lfd 4, 248(1)
 ; CHECK-NEXT:    bl __gcc_qsub at PLT
 ; CHECK-NEXT:    stfd 2, 176(1)
-; CHECK-NEXT:    fcmpu 0, 2, 27
 ; CHECK-NEXT:    stfd 1, 168(1)
-; CHECK-NEXT:    fcmpu 1, 1, 27
+; CHECK-NEXT:    fcmpu 0, 2, 27
 ; CHECK-NEXT:    lwz 3, 180(1)
+; CHECK-NEXT:    fcmpu 1, 1, 27
 ; CHECK-NEXT:    crandc 20, 6, 0
 ; CHECK-NEXT:    cror 21, 5, 7
-; CHECK-NEXT:    cror 20, 21, 20
 ; CHECK-NEXT:    stw 3, 268(1)
+; CHECK-NEXT:    cror 20, 21, 20
 ; CHECK-NEXT:    lwz 3, 176(1)
 ; CHECK-NEXT:    stw 3, 264(1)
 ; CHECK-NEXT:    lwz 3, 172(1)
+; CHECK-NEXT:    lfd 30, 264(1)
 ; CHECK-NEXT:    stw 3, 276(1)
 ; CHECK-NEXT:    lwz 3, 168(1)
 ; CHECK-NEXT:    stw 3, 272(1)
-; CHECK-NEXT:    lfd 30, 264(1)
 ; CHECK-NEXT:    lfd 31, 272(1)
 ; CHECK-NEXT:    bc 12, 20, .LBB0_13
 ; CHECK-NEXT:  # %bb.10: # %bb2
-; CHECK-NEXT:    fneg 29, 30
 ; CHECK-NEXT:    fneg 28, 31
-; CHECK-NEXT:    li 29, 0
-; CHECK-NEXT:    lis 3, 16864
 ; CHECK-NEXT:    stfd 28, 48(1)
+; CHECK-NEXT:    lis 3, 16864
+; CHECK-NEXT:    stw 3, 80(1)
+; CHECK-NEXT:    fneg 29, 30
+; CHECK-NEXT:    lwz 3, 52(1)
 ; CHECK-NEXT:    stfd 29, 40(1)
+; CHECK-NEXT:    li 29, 0
 ; CHECK-NEXT:    stw 29, 84(1)
-; CHECK-NEXT:    stw 3, 80(1)
 ; CHECK-NEXT:    stw 29, 76(1)
 ; CHECK-NEXT:    stw 29, 72(1)
-; CHECK-NEXT:    lwz 3, 52(1)
 ; CHECK-NEXT:    stw 3, 68(1)
+; CHECK-NEXT:    lfd 3, 80(1)
+; CHECK-NEXT:    lfd 4, 72(1)
 ; CHECK-NEXT:    lwz 3, 48(1)
 ; CHECK-NEXT:    stw 3, 64(1)
 ; CHECK-NEXT:    lwz 3, 44(1)
+; CHECK-NEXT:    lfd 1, 64(1)
 ; CHECK-NEXT:    stw 3, 60(1)
 ; CHECK-NEXT:    lwz 3, 40(1)
 ; CHECK-NEXT:    stw 3, 56(1)
-; CHECK-NEXT:    lfd 3, 80(1)
-; CHECK-NEXT:    lfd 4, 72(1)
-; CHECK-NEXT:    lfd 1, 64(1)
 ; CHECK-NEXT:    lfd 2, 56(1)
 ; CHECK-NEXT:    bl __gcc_qsub at PLT
 ; CHECK-NEXT:    mffs 0
@@ -225,8 +225,8 @@ define i64 @__fixunstfdi(ppc_fp128 %a) n
 ; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    fctiwz 2, 2
 ; CHECK-NEXT:    stfd 2, 24(1)
-; CHECK-NEXT:    fcmpu 1, 31, 1
 ; CHECK-NEXT:    lwz 3, 36(1)
+; CHECK-NEXT:    fcmpu 1, 31, 1
 ; CHECK-NEXT:    lwz 4, 28(1)
 ; CHECK-NEXT:    crandc 20, 6, 1
 ; CHECK-NEXT:    cror 20, 4, 20
@@ -240,25 +240,25 @@ define i64 @__fixunstfdi(ppc_fp128 %a) n
 ; CHECK-NEXT:    subfe 3, 29, 30
 ; CHECK-NEXT:    b .LBB0_16
 ; CHECK-NEXT:  .LBB0_13: # %bb3
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    lis 4, 16864
 ; CHECK-NEXT:    stfd 31, 112(1)
-; CHECK-NEXT:    stfd 30, 104(1)
+; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    stw 3, 148(1)
-; CHECK-NEXT:    stw 4, 144(1)
 ; CHECK-NEXT:    stw 3, 140(1)
 ; CHECK-NEXT:    stw 3, 136(1)
+; CHECK-NEXT:    stfd 30, 104(1)
+; CHECK-NEXT:    lis 4, 16864
 ; CHECK-NEXT:    lwz 3, 116(1)
+; CHECK-NEXT:    stw 4, 144(1)
+; CHECK-NEXT:    lfd 4, 136(1)
 ; CHECK-NEXT:    stw 3, 132(1)
+; CHECK-NEXT:    lfd 3, 144(1)
 ; CHECK-NEXT:    lwz 3, 112(1)
 ; CHECK-NEXT:    stw 3, 128(1)
 ; CHECK-NEXT:    lwz 3, 108(1)
+; CHECK-NEXT:    lfd 1, 128(1)
 ; CHECK-NEXT:    stw 3, 124(1)
 ; CHECK-NEXT:    lwz 3, 104(1)
 ; CHECK-NEXT:    stw 3, 120(1)
-; CHECK-NEXT:    lfd 3, 144(1)
-; CHECK-NEXT:    lfd 4, 136(1)
-; CHECK-NEXT:    lfd 1, 128(1)
 ; CHECK-NEXT:    lfd 2, 120(1)
 ; CHECK-NEXT:    bl __gcc_qsub at PLT
 ; CHECK-NEXT:    mffs 0
@@ -280,8 +280,8 @@ define i64 @__fixunstfdi(ppc_fp128 %a) n
 ; CHECK-NEXT:    fcmpu 0, 30, 0
 ; CHECK-NEXT:    fctiwz 2, 2
 ; CHECK-NEXT:    stfd 2, 88(1)
-; CHECK-NEXT:    fcmpu 1, 31, 1
 ; CHECK-NEXT:    lwz 3, 100(1)
+; CHECK-NEXT:    fcmpu 1, 31, 1
 ; CHECK-NEXT:    lwz 4, 92(1)
 ; CHECK-NEXT:    crandc 20, 6, 0
 ; CHECK-NEXT:    cror 20, 5, 20

Modified: llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/extract-and-store.ll Mon Jul  1 20:28:52 2019
@@ -744,6 +744,7 @@ define void @test_13_consecutive_stores_
 ; CHECK-NEXT:    stb r4, 9(r5)
 ; CHECK-NEXT:    rldicl r4, r7, 32, 56
 ; CHECK-NEXT:    rldicl r6, r7, 8, 56
+; CHECK-NEXT:    stb r3, 12(r5)
 ; CHECK-NEXT:    stb r4, 0(r5)
 ; CHECK-NEXT:    rldicl r4, r7, 16, 56
 ; CHECK-NEXT:    stb r6, 3(r5)
@@ -756,7 +757,6 @@ define void @test_13_consecutive_stores_
 ; CHECK-NEXT:    rldicl r4, r7, 24, 56
 ; CHECK-NEXT:    stb r6, 10(r5)
 ; CHECK-NEXT:    stb r4, 11(r5)
-; CHECK-NEXT:    stb r3, 12(r5)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test_13_consecutive_stores_of_bytes:

Modified: llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/f128-aggregates.ll Mon Jul  1 20:28:52 2019
@@ -82,6 +82,7 @@ define fp128 @testStruct_03(%struct.With
                             align 16 %a) {
 ; CHECK-LABEL: testStruct_03:
 ; CHECK:       # %bb.0: # %entry
+; CHECK:        lxv v2, 128(r1)
 ; CHECK-DAG:    std r10, 88(r1)
 ; CHECK-DAG:    std r9, 80(r1)
 ; CHECK-DAG:    std r8, 72(r1)
@@ -90,11 +91,11 @@ define fp128 @testStruct_03(%struct.With
 ; CHECK-DAG:    std r5, 48(r1)
 ; CHECK-DAG:    std r4, 40(r1)
 ; CHECK-DAG:    std r3, 32(r1)
-; CHECK-NEXT:    lxv v2, 128(r1)
 ; CHECK-NEXT:    blr
 
 ; CHECK-BE-LABEL: testStruct_03:
 ; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE:        lxv v2, 144(r1)
 ; CHECK-BE-DAG:    std r10, 104(r1)
 ; CHECK-BE-DAG:    std r9, 96(r1)
 ; CHECK-BE-DAG:    std r8, 88(r1)
@@ -103,7 +104,6 @@ define fp128 @testStruct_03(%struct.With
 ; CHECK-BE-DAG:    std r5, 64(r1)
 ; CHECK-BE-DAG:    std r4, 56(r1)
 ; CHECK-BE-DAG:    std r3, 48(r1)
-; CHECK-BE-NEXT:    lxv v2, 144(r1)
 ; CHECK-BE-NEXT:    blr
 entry:
   %a7 = getelementptr inbounds %struct.With9fp128params,
@@ -256,28 +256,28 @@ entry:
 define fp128 @testNestedAggregate(%struct.MixedC* byval nocapture readonly align 16 %a) {
 ; CHECK-LABEL: testNestedAggregate:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-DAG:     std r10, 88(r1)
-; CHECK-DAG:     std r9, 80(r1)
 ; CHECK-DAG:     std r8, 72(r1)
 ; CHECK-DAG:     std r7, 64(r1)
+; CHECK:         lxv v2, 64(r1)
+; CHECK-DAG:     std r10, 88(r1)
+; CHECK-DAG:     std r9, 80(r1)
 ; CHECK-DAG:     std r6, 56(r1)
 ; CHECK-DAG:     std r5, 48(r1)
 ; CHECK-DAG:     std r4, 40(r1)
 ; CHECK-DAG:     std r3, 32(r1)
-; CHECK-NEXT:    lxv v2, 64(r1)
 ; CHECK-NEXT:    blr
 
 ; CHECK-BE-LABEL: testNestedAggregate:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-DAG:     std r8, 88(r1)
 ; CHECK-BE-DAG:     std r7, 80(r1)
+; CHECK-BE-NEXT:    lxv v2, 80(r1)
 ; CHECK-BE-DAG:     std r10, 104(r1)
 ; CHECK-BE-DAG:     std r9, 96(r1)
 ; CHECK-BE-DAG:     std r6, 72(r1)
 ; CHECK-BE-DAG:     std r5, 64(r1)
 ; CHECK-BE-DAG:     std r4, 56(r1)
 ; CHECK-BE-DAG:     std r3, 48(r1)
-; CHECK-BE-NEXT:    lxv v2, 80(r1)
 ; CHECK-BE-NEXT:    blr
 entry:
   %c = getelementptr inbounds %struct.MixedC, %struct.MixedC* %a, i64 0, i32 1, i32 1

Modified: llvm/trunk/test/CodeGen/PowerPC/legalize-vaarg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/legalize-vaarg.ll?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/legalize-vaarg.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/legalize-vaarg.ll Mon Jul  1 20:28:52 2019
@@ -5,6 +5,7 @@
 define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) {
 ; BE-LABEL: test_large_vec_vaarg:
 ; BE:       # %bb.0:
+; BE-NEXT:    ld 3, -8(1)
 ; BE-NEXT:    std 4, 56(1)
 ; BE-NEXT:    std 5, 64(1)
 ; BE-NEXT:    std 6, 72(1)
@@ -12,7 +13,6 @@ define <8 x i32> @test_large_vec_vaarg(i
 ; BE-NEXT:    std 8, 88(1)
 ; BE-NEXT:    std 9, 96(1)
 ; BE-NEXT:    std 10, 104(1)
-; BE-NEXT:    ld 3, -8(1)
 ; BE-NEXT:    addi 3, 3, 15
 ; BE-NEXT:    rldicr 3, 3, 0, 59
 ; BE-NEXT:    addi 4, 3, 16
@@ -27,16 +27,16 @@ define <8 x i32> @test_large_vec_vaarg(i
 ;
 ; LE-LABEL: test_large_vec_vaarg:
 ; LE:       # %bb.0:
+; LE-NEXT:    ld 3, -8(1)
 ; LE-NEXT:    std 4, 40(1)
 ; LE-NEXT:    std 5, 48(1)
 ; LE-NEXT:    std 6, 56(1)
 ; LE-NEXT:    std 7, 64(1)
+; LE-NEXT:    addi 3, 3, 15
+; LE-NEXT:    rldicr 3, 3, 0, 59
 ; LE-NEXT:    std 8, 72(1)
 ; LE-NEXT:    std 9, 80(1)
 ; LE-NEXT:    std 10, 88(1)
-; LE-NEXT:    ld 3, -8(1)
-; LE-NEXT:    addi 3, 3, 15
-; LE-NEXT:    rldicr 3, 3, 0, 59
 ; LE-NEXT:    addi 4, 3, 31
 ; LE-NEXT:    addi 5, 3, 16
 ; LE-NEXT:    rldicr 4, 4, 0, 59

Modified: llvm/trunk/test/CodeGen/PowerPC/ppc32-skip-regs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ppc32-skip-regs.ll?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/ppc32-skip-regs.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/ppc32-skip-regs.ll Mon Jul  1 20:28:52 2019
@@ -16,10 +16,10 @@ entry:
 ; left for long double type (4 registers in soft float mode). Instead in r8 register this
 ; argument put on stack.
 ; CHECK-NOT: mr 8, 4
-; CHECK: stw 7, 20(1)
 ; CHECK: stw 6, 16(1)
 ; CHECK: stw 5, 12(1)
 ; CHECK: stw 4, 8(1)
+; CHECK: stw 7, 20(1)
 
 declare i32 @printf(i8* nocapture readonly, ...)
 

Added: llvm/trunk/test/CodeGen/PowerPC/scheduling-mem-dependency.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/scheduling-mem-dependency.ll?rev=364886&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/scheduling-mem-dependency.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/scheduling-mem-dependency.ll Mon Jul  1 20:28:52 2019
@@ -0,0 +1,19 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+
+define i64 @store_disjoint_memory(i64* nocapture %P, i64 %v) {
+entry:
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: store_disjoint_memory:%bb.0
+; CHECK:SU(2):   STD renamable $x4, 24, renamable $x5 :: (store 8 into %ir.arrayidx)
+; CHECK-NOT: Successors:
+; CHECK-NOT:    SU(3): Ord  Latency=0 Memory
+; CHECK:SU(3):   STD renamable $x4, 16, renamable $x5 :: (store 8 into %ir.arrayidx1)
+; CHECK: Predecessors:
+; CHECK-NOT:    SU(2): Ord  Latency=0 Memory
+  %arrayidx = getelementptr inbounds i64, i64* %P, i64 3
+  store i64 %v, i64* %arrayidx
+  %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2
+  store i64 %v, i64* %arrayidx1
+  ret i64 %v
+}

Modified: llvm/trunk/test/CodeGen/PowerPC/varargs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/varargs.ll?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/varargs.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/varargs.ll Mon Jul  1 20:28:52 2019
@@ -8,25 +8,29 @@ define i8* @test1(i8** %foo) nounwind {
 ; P32-LABEL: test1:
 ; P32:       # %bb.0:
 ; P32-NEXT:    lbz r4, 0(r3)
-; P32-NEXT:    addi r5, r4, 1
-; P32-NEXT:    stb r5, 0(r3)
-; P32-NEXT:    cmpwi r4, 8
 ; P32-NEXT:    lwz r5, 4(r3)
+; P32-NEXT:    lwz r6, 8(r3)
+; P32-NEXT:    addi r7, r4, 1
+; P32-NEXT:    stb r7, 0(r3)
+; P32-NEXT:    addi r7, r5, 4
+; P32-NEXT:    cmpwi r4, 8
 ; P32-NEXT:    slwi r4, r4, 2
-; P32-NEXT:    addi r6, r5, 4
-; P32-NEXT:    bc 12, lt, .LBB0_1
-; P32-NEXT:    b .LBB0_2
-; P32-NEXT:  .LBB0_1:
-; P32-NEXT:    addi r6, r5, 0
+; P32-NEXT:    add r4, r6, r4
+; P32-NEXT:    bc 12, lt, .LBB0_2
+; P32-NEXT:  # %bb.1:
+; P32-NEXT:    ori r6, r7, 0
+; P32-NEXT:    b .LBB0_3
 ; P32-NEXT:  .LBB0_2:
+; P32-NEXT:    addi r6, r5, 0
+; P32-NEXT:  .LBB0_3:
 ; P32-NEXT:    stw r6, 4(r3)
-; P32-NEXT:    lwz r3, 8(r3)
-; P32-NEXT:    add r3, r3, r4
-; P32-NEXT:    bc 12, lt, .LBB0_4
-; P32-NEXT:  # %bb.3:
+; P32-NEXT:    bc 12, lt, .LBB0_5
+; P32-NEXT:  # %bb.4:
 ; P32-NEXT:    ori r3, r5, 0
-; P32-NEXT:    b .LBB0_4
-; P32-NEXT:  .LBB0_4:
+; P32-NEXT:    b .LBB0_6
+; P32-NEXT:  .LBB0_5:
+; P32-NEXT:    addi r3, r4, 0
+; P32-NEXT:  .LBB0_6:
 ; P32-NEXT:    lwz r3, 0(r3)
 ; P32-NEXT:    blr
 ;

Modified: llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec-min-max.ll Mon Jul  1 20:28:52 2019
@@ -67,13 +67,13 @@ define <2 x i64> @getsmaxi64(<2 x i64> %
 ; NOP8VEC-NEXT:    stxvd2x 1, 0, 4
 ; NOP8VEC-NEXT:    ld 3, -24(1)
 ; NOP8VEC-NEXT:    ld 4, -40(1)
+; NOP8VEC-NEXT:    ld 6, -48(1)
 ; NOP8VEC-NEXT:    cmpd 4, 3
 ; NOP8VEC-NEXT:    li 3, 0
 ; NOP8VEC-NEXT:    li 4, -1
 ; NOP8VEC-NEXT:    isel 5, 4, 3, 1
 ; NOP8VEC-NEXT:    std 5, -8(1)
 ; NOP8VEC-NEXT:    ld 5, -32(1)
-; NOP8VEC-NEXT:    ld 6, -48(1)
 ; NOP8VEC-NEXT:    cmpd 6, 5
 ; NOP8VEC-NEXT:    isel 3, 4, 3, 1
 ; NOP8VEC-NEXT:    std 3, -16(1)
@@ -184,13 +184,13 @@ define <2 x i64> @getsmini64(<2 x i64> %
 ; NOP8VEC-NEXT:    stxvd2x 1, 0, 4
 ; NOP8VEC-NEXT:    ld 3, -24(1)
 ; NOP8VEC-NEXT:    ld 4, -40(1)
+; NOP8VEC-NEXT:    ld 6, -48(1)
 ; NOP8VEC-NEXT:    cmpd 4, 3
 ; NOP8VEC-NEXT:    li 3, 0
 ; NOP8VEC-NEXT:    li 4, -1
 ; NOP8VEC-NEXT:    isel 5, 4, 3, 0
 ; NOP8VEC-NEXT:    std 5, -8(1)
 ; NOP8VEC-NEXT:    ld 5, -32(1)
-; NOP8VEC-NEXT:    ld 6, -48(1)
 ; NOP8VEC-NEXT:    cmpd 6, 5
 ; NOP8VEC-NEXT:    isel 3, 4, 3, 0
 ; NOP8VEC-NEXT:    std 3, -16(1)

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx.ll?rev=364886&r1=364885&r2=364886&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx.ll Mon Jul  1 20:28:52 2019
@@ -802,9 +802,9 @@ define <2 x i64> @test26(<2 x i64> %a, <
 ; CHECK-NEXT:    ld r3, -24(r1)
 ; CHECK-NEXT:    ld r4, -40(r1)
 ; CHECK-NEXT:    add r3, r4, r3
+; CHECK-NEXT:    ld r4, -48(r1)
 ; CHECK-NEXT:    std r3, -8(r1)
 ; CHECK-NEXT:    ld r3, -32(r1)
-; CHECK-NEXT:    ld r4, -48(r1)
 ; CHECK-NEXT:    add r3, r4, r3
 ; CHECK-NEXT:    std r3, -16(r1)
 ; CHECK-NEXT:    addi r3, r1, -16
@@ -820,9 +820,9 @@ define <2 x i64> @test26(<2 x i64> %a, <
 ; CHECK-REG-NEXT:    ld r3, -24(r1)
 ; CHECK-REG-NEXT:    ld r4, -40(r1)
 ; CHECK-REG-NEXT:    add r3, r4, r3
+; CHECK-REG-NEXT:    ld r4, -48(r1)
 ; CHECK-REG-NEXT:    std r3, -8(r1)
 ; CHECK-REG-NEXT:    ld r3, -32(r1)
-; CHECK-REG-NEXT:    ld r4, -48(r1)
 ; CHECK-REG-NEXT:    add r3, r4, r3
 ; CHECK-REG-NEXT:    std r3, -16(r1)
 ; CHECK-REG-NEXT:    addi r3, r1, -16
@@ -1832,9 +1832,9 @@ define <2 x i64> @test60(<2 x i64> %a, <
 ; CHECK-NEXT:    lwz r3, -20(r1)
 ; CHECK-NEXT:    ld r4, -40(r1)
 ; CHECK-NEXT:    sld r3, r4, r3
+; CHECK-NEXT:    ld r4, -48(r1)
 ; CHECK-NEXT:    std r3, -8(r1)
 ; CHECK-NEXT:    lwz r3, -28(r1)
-; CHECK-NEXT:    ld r4, -48(r1)
 ; CHECK-NEXT:    sld r3, r4, r3
 ; CHECK-NEXT:    std r3, -16(r1)
 ; CHECK-NEXT:    addi r3, r1, -16
@@ -1850,9 +1850,9 @@ define <2 x i64> @test60(<2 x i64> %a, <
 ; CHECK-REG-NEXT:    lwz r3, -20(r1)
 ; CHECK-REG-NEXT:    ld r4, -40(r1)
 ; CHECK-REG-NEXT:    sld r3, r4, r3
+; CHECK-REG-NEXT:    ld r4, -48(r1)
 ; CHECK-REG-NEXT:    std r3, -8(r1)
 ; CHECK-REG-NEXT:    lwz r3, -28(r1)
-; CHECK-REG-NEXT:    ld r4, -48(r1)
 ; CHECK-REG-NEXT:    sld r3, r4, r3
 ; CHECK-REG-NEXT:    std r3, -16(r1)
 ; CHECK-REG-NEXT:    addi r3, r1, -16
@@ -1898,9 +1898,9 @@ define <2 x i64> @test61(<2 x i64> %a, <
 ; CHECK-NEXT:    lwz r3, -20(r1)
 ; CHECK-NEXT:    ld r4, -40(r1)
 ; CHECK-NEXT:    srd r3, r4, r3
+; CHECK-NEXT:    ld r4, -48(r1)
 ; CHECK-NEXT:    std r3, -8(r1)
 ; CHECK-NEXT:    lwz r3, -28(r1)
-; CHECK-NEXT:    ld r4, -48(r1)
 ; CHECK-NEXT:    srd r3, r4, r3
 ; CHECK-NEXT:    std r3, -16(r1)
 ; CHECK-NEXT:    addi r3, r1, -16
@@ -1916,9 +1916,9 @@ define <2 x i64> @test61(<2 x i64> %a, <
 ; CHECK-REG-NEXT:    lwz r3, -20(r1)
 ; CHECK-REG-NEXT:    ld r4, -40(r1)
 ; CHECK-REG-NEXT:    srd r3, r4, r3
+; CHECK-REG-NEXT:    ld r4, -48(r1)
 ; CHECK-REG-NEXT:    std r3, -8(r1)
 ; CHECK-REG-NEXT:    lwz r3, -28(r1)
-; CHECK-REG-NEXT:    ld r4, -48(r1)
 ; CHECK-REG-NEXT:    srd r3, r4, r3
 ; CHECK-REG-NEXT:    std r3, -16(r1)
 ; CHECK-REG-NEXT:    addi r3, r1, -16
@@ -1964,9 +1964,9 @@ define <2 x i64> @test62(<2 x i64> %a, <
 ; CHECK-NEXT:    lwz r3, -20(r1)
 ; CHECK-NEXT:    ld r4, -40(r1)
 ; CHECK-NEXT:    srad r3, r4, r3
+; CHECK-NEXT:    ld r4, -48(r1)
 ; CHECK-NEXT:    std r3, -8(r1)
 ; CHECK-NEXT:    lwz r3, -28(r1)
-; CHECK-NEXT:    ld r4, -48(r1)
 ; CHECK-NEXT:    srad r3, r4, r3
 ; CHECK-NEXT:    std r3, -16(r1)
 ; CHECK-NEXT:    addi r3, r1, -16
@@ -1982,9 +1982,9 @@ define <2 x i64> @test62(<2 x i64> %a, <
 ; CHECK-REG-NEXT:    lwz r3, -20(r1)
 ; CHECK-REG-NEXT:    ld r4, -40(r1)
 ; CHECK-REG-NEXT:    srad r3, r4, r3
+; CHECK-REG-NEXT:    ld r4, -48(r1)
 ; CHECK-REG-NEXT:    std r3, -8(r1)
 ; CHECK-REG-NEXT:    lwz r3, -28(r1)
-; CHECK-REG-NEXT:    ld r4, -48(r1)
 ; CHECK-REG-NEXT:    srad r3, r4, r3
 ; CHECK-REG-NEXT:    std r3, -16(r1)
 ; CHECK-REG-NEXT:    addi r3, r1, -16
@@ -2148,13 +2148,13 @@ define <2 x i1> @test67(<2 x i64> %a, <2
 ; CHECK-NEXT:    stxvd2x v2, 0, r4
 ; CHECK-NEXT:    ld r3, -24(r1)
 ; CHECK-NEXT:    ld r4, -40(r1)
+; CHECK-NEXT:    ld r6, -48(r1)
 ; CHECK-NEXT:    cmpld r4, r3
 ; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    li r4, -1
 ; CHECK-NEXT:    isel r5, r4, r3, lt
 ; CHECK-NEXT:    std r5, -8(r1)
 ; CHECK-NEXT:    ld r5, -32(r1)
-; CHECK-NEXT:    ld r6, -48(r1)
 ; CHECK-NEXT:    cmpld r6, r5
 ; CHECK-NEXT:    isel r3, r4, r3, lt
 ; CHECK-NEXT:    std r3, -16(r1)
@@ -2170,13 +2170,13 @@ define <2 x i1> @test67(<2 x i64> %a, <2
 ; CHECK-REG-NEXT:    stxvd2x v2, 0, r4
 ; CHECK-REG-NEXT:    ld r3, -24(r1)
 ; CHECK-REG-NEXT:    ld r4, -40(r1)
+; CHECK-REG-NEXT:    ld r6, -48(r1)
 ; CHECK-REG-NEXT:    cmpld r4, r3
 ; CHECK-REG-NEXT:    li r3, 0
 ; CHECK-REG-NEXT:    li r4, -1
 ; CHECK-REG-NEXT:    isel r5, r4, r3, lt
 ; CHECK-REG-NEXT:    std r5, -8(r1)
 ; CHECK-REG-NEXT:    ld r5, -32(r1)
-; CHECK-REG-NEXT:    ld r6, -48(r1)
 ; CHECK-REG-NEXT:    cmpld r6, r5
 ; CHECK-REG-NEXT:    isel r3, r4, r3, lt
 ; CHECK-REG-NEXT:    std r3, -16(r1)




More information about the llvm-commits mailing list