[llvm] 5357dd2 - [MachinePipeliner] Fix Phi generation failure for large stages

KAWASHIMA Takahiro via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 8 21:14:57 PDT 2022


Author: Yuta Mukai
Date: 2022-08-09T13:14:26+09:00
New Revision: 5357dd2f43a5aab28f34b69e7d3742dd50cbad98

URL: https://github.com/llvm/llvm-project/commit/5357dd2f43a5aab28f34b69e7d3742dd50cbad98
DIFF: https://github.com/llvm/llvm-project/commit/5357dd2f43a5aab28f34b69e7d3742dd50cbad98.diff

LOG: [MachinePipeliner] Fix Phi generation failure for large stages

The previous code overwrites VRMap for prologue stages during Phi
generation if a register spans many stages.
As a result, the wrong register is used as the one coming from
the prologue in Phis at later stages. (A process exists to correct
this, but it does not work in all cases.)
In addition, VRMap for prologue must be preserved until addBranches().

This patch fixes them by separating the map for Phis into a different
variable (VRMapPhi).

Reviewed By: bcahoon

Differential Revision: https://reviews.llvm.org/D127840

Added: 
    llvm/test/CodeGen/PowerPC/sms-large-stages.mir

Modified: 
    llvm/include/llvm/CodeGen/ModuloSchedule.h
    llvm/lib/CodeGen/ModuloSchedule.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index c515101e80fdf..b9213ab58aa4b 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -192,7 +192,8 @@ class ModuloScheduleExpander {
                       ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
   void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
                       MachineBasicBlock *OrigBB, ValueMapTy *VRMap,
-                      MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
+                      ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
+                      MBBVectorTy &PrologBBs);
   void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
                             MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
                             ValueMapTy *VRMap, InstrMapTy &InstrMap,
@@ -200,8 +201,9 @@ class ModuloScheduleExpander {
                             bool IsLast);
   void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
                     MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
-                    ValueMapTy *VRMap, InstrMapTy &InstrMap,
-                    unsigned LastStageNum, unsigned CurStageNum, bool IsLast);
+                    ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
+                    InstrMapTy &InstrMap, unsigned LastStageNum,
+                    unsigned CurStageNum, bool IsLast);
   void removeDeadInstructions(MachineBasicBlock *KernelBB,
                               MBBVectorTy &EpilogBBs);
   void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs);

diff  --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 581168b313849..d689e8e9d2f20 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -116,6 +116,12 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
   // a map between register names in the original block and the names created
   // in each stage of the pipelined loop.
   ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
+
+  // The renaming destination by Phis for the registers across stages.
+  // This map is updated during Phis generation to point to the most recent
+  // renaming destination.
+  ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
+
   InstrMapTy InstrMap;
 
   SmallVector<MachineBasicBlock *, 4> PrologBBs;
@@ -151,14 +157,15 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
 
   generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
                        InstrMap, MaxStageCount, MaxStageCount, false);
-  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
-               MaxStageCount, MaxStageCount, false);
+  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
+               InstrMap, MaxStageCount, MaxStageCount, false);
 
   LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
 
   SmallVector<MachineBasicBlock *, 4> EpilogBBs;
   // Generate the epilog instructions to complete the pipeline.
-  generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs);
+  generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
+                 PrologBBs);
 
   // We need this step because the register allocation doesn't handle some
   // situations well, so we insert copies to help out.
@@ -171,6 +178,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
   addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
 
   delete[] VRMap;
+  delete[] VRMapPhi;
 }
 
 void ModuloScheduleExpander::cleanup() {
@@ -242,7 +250,8 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
 /// block for each stage that needs to complete.
 void ModuloScheduleExpander::generateEpilog(
     unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
-    ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) {
+    ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
+    MBBVectorTy &PrologBBs) {
   // We need to change the branch from the kernel to the first epilog block, so
   // this call to analyze branch uses the kernel rather than the original BB.
   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -296,8 +305,8 @@ void ModuloScheduleExpander::generateEpilog(
     }
     generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
                          InstrMap, LastStage, EpilogStage, i == 1);
-    generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
-                 LastStage, EpilogStage, i == 1);
+    generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
+                 InstrMap, LastStage, EpilogStage, i == 1);
     PredBB = NewBB;
 
     LLVM_DEBUG({
@@ -593,8 +602,9 @@ void ModuloScheduleExpander::generateExistingPhis(
 /// use in the pipelined sequence.
 void ModuloScheduleExpander::generatePhis(
     MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
-    MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
-    unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
+    MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
+    InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+    bool IsLast) {
   // Compute the stage number that contains the initial Phi value, and
   // the Phi from the previous stage.
   unsigned PrologStage = 0;
@@ -631,26 +641,49 @@ void ModuloScheduleExpander::generatePhis(
       if (!InKernel && (unsigned)StageScheduled > PrologStage)
         continue;
 
-      unsigned PhiOp2 = VRMap[PrevStage][Def];
-      if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
-        if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
-          PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+      unsigned PhiOp2;
+      if (InKernel) {
+        PhiOp2 = VRMap[PrevStage][Def];
+        if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
+          if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
+            PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+      }
       // The number of Phis can't exceed the number of prolog stages. The
       // prolog stage number is zero based.
       if (NumPhis > PrologStage + 1 - StageScheduled)
         NumPhis = PrologStage + 1 - StageScheduled;
       for (unsigned np = 0; np < NumPhis; ++np) {
+        // Example for
+        // Org:
+        //   %Org = ... (Scheduled at Stage#0, NumPhi = 2)
+        //
+        // Prolog0 (Stage0):
+        //   %Clone0 = ...
+        // Prolog1 (Stage1):
+        //   %Clone1 = ...
+        // Kernel (Stage2):
+        //   %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
+        //   %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
+        //   %Clone2 = ...
+        // Epilog0 (Stage3):
+        //   %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
+        //   %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
+        // Epilog1 (Stage4):
+        //   %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
+        //
+        // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
+        // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
+        // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
+
         unsigned PhiOp1 = VRMap[PrologStage][Def];
         if (np <= PrologStage)
           PhiOp1 = VRMap[PrologStage - np][Def];
-        if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
-          if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
-            PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
-          if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
-            PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
+        if (!InKernel) {
+          if (PrevStage == LastStageNum && np == 0)
+            PhiOp2 = VRMap[LastStageNum][Def];
+          else
+            PhiOp2 = VRMapPhi[PrevStage - np][Def];
         }
-        if (!InKernel)
-          PhiOp2 = VRMap[PrevStage - np][Def];
 
         const TargetRegisterClass *RC = MRI.getRegClass(Def);
         Register NewReg = MRI.createVirtualRegister(RC);
@@ -672,9 +705,9 @@ void ModuloScheduleExpander::generatePhis(
                                 NewReg);
 
           PhiOp2 = NewReg;
-          VRMap[PrevStage - np - 1][Def] = NewReg;
+          VRMapPhi[PrevStage - np - 1][Def] = NewReg;
         } else {
-          VRMap[CurStageNum - np][Def] = NewReg;
+          VRMapPhi[CurStageNum - np][Def] = NewReg;
           if (np == NumPhis - 1)
             rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
                                   NewReg);

diff  --git a/llvm/test/CodeGen/PowerPC/sms-large-stages.mir b/llvm/test/CodeGen/PowerPC/sms-large-stages.mir
new file mode 100644
index 0000000000000..0ba5ca2bde2e8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/sms-large-stages.mir
@@ -0,0 +1,296 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 --ppc-enable-pipeliner -pipeliner-max-stages=10 -run-pass=pipeliner -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+
+--- |
+  define dso_local void @f(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr #0 {
+  entry:
+    %wide.trip.count = zext i32 %n to i64
+    %uglygep2 = getelementptr i8, ptr %b, i64 -4
+    %uglygep3 = getelementptr i8, ptr %a, i64 -4
+    call void @llvm.set.loop.iterations.i64(i64 %wide.trip.count)
+    br label %for.body
+
+  for.cond.cleanup:                                 ; preds = %for.body
+    ret void
+
+  for.body:                                         ; preds = %for.body, %entry
+    %0 = phi ptr [ %uglygep2, %entry ], [ %3, %for.body ]
+    %1 = phi ptr [ %uglygep3, %entry ], [ %2, %for.body ]
+    %2 = getelementptr i8, ptr %1, i64 4
+    %3 = getelementptr i8, ptr %0, i64 4
+    %4 = load float, ptr %3, align 4
+    %add = fadd float %4, %4
+    %add3 = fadd float %4, %add
+    store float %add3, ptr %2, align 4
+    %5 = call i1 @llvm.loop.decrement.i64(i64 1)
+    br i1 %5, label %for.body, label %for.cond.cleanup, !llvm.loop !0
+  }
+
+  ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
+  declare void @llvm.set.loop.iterations.i64(i64) #1
+
+  ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
+  declare i1 @llvm.loop.decrement.i64(i64) #1
+
+  attributes #0 = { argmemonly nofree norecurse nosync nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+power8-vector,+power9-vector,+quadword-atomics,+vsx,-privileged,-rop-protect,-spe" }
+  attributes #1 = { nocallback noduplicate nofree nosync nounwind willreturn }
+
+  !0 = distinct !{!0, !1, !2, !3}
+  !1 = !{!"llvm.loop.mustprogress"}
+  !2 = !{!"llvm.loop.unroll.disable"}
+  !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3}
+
+...
+---
+name:            f
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: g8rc }
+  - { id: 1, class: g8rc }
+  - { id: 2, class: g8rc_and_g8rc_nox0 }
+  - { id: 3, class: g8rc_and_g8rc_nox0 }
+  - { id: 4, class: g8rc }
+  - { id: 5, class: g8rc }
+  - { id: 6, class: g8rc_and_g8rc_nox0 }
+  - { id: 7, class: g8rc_and_g8rc_nox0 }
+  - { id: 8, class: g8rc }
+  - { id: 9, class: g8rc }
+  - { id: 10, class: f4rc }
+  - { id: 11, class: g8rc_and_g8rc_nox0 }
+  - { id: 12, class: vssrc }
+  - { id: 13, class: f4rc }
+  - { id: 14, class: g8rc_and_g8rc_nox0 }
+liveins:
+  - { reg: '$x3', virtual-reg: '%6' }
+  - { reg: '$x4', virtual-reg: '%7' }
+  - { reg: '$x5', virtual-reg: '%8' }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: f
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
+  ; CHECK-NEXT:   liveins: $x3, $x4, $x5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:g8rc = COPY $x5
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x4
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+  ; CHECK-NEXT:   [[RLDICL:%[0-9]+]]:g8rc = RLDICL [[COPY]], 0, 32
+  ; CHECK-NEXT:   [[ADDI8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[COPY1]], -4
+  ; CHECK-NEXT:   [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[COPY2]], -4
+  ; CHECK-NEXT:   MTCTR8loop [[RLDICL]], implicit-def dead $ctr8
+  ; CHECK-NEXT:   B %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.for.cond.cleanup:
+  ; CHECK-NEXT:   BLR8 implicit $lr8, implicit $rm
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3.for.body:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.17(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[LFSU:%[0-9]+]]:f4rc, [[LFSU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[ADDI8_]] :: (load (s32) from %ir.3)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU1]]
+  ; CHECK-NEXT:   BDZ8 %bb.17, implicit-def $ctr8, implicit $ctr8
+  ; CHECK-NEXT:   B %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4.for.body:
+  ; CHECK-NEXT:   successors: %bb.5(0x40000000), %bb.16(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[LFSU2:%[0-9]+]]:f4rc, [[LFSU3:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY3]] :: (load unknown-size from %ir.3, align 4)
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU3]]
+  ; CHECK-NEXT:   BDZ8 %bb.16, implicit-def $ctr8, implicit $ctr8
+  ; CHECK-NEXT:   B %bb.5
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.5.for.body:
+  ; CHECK-NEXT:   successors: %bb.6(0x40000000), %bb.15(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[XSADDSP:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU]], [[LFSU]]
+  ; CHECK-NEXT:   [[LFSU4:%[0-9]+]]:f4rc, [[LFSU5:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY4]] :: (load unknown-size from %ir.3, align 4)
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU5]]
+  ; CHECK-NEXT:   BDZ8 %bb.15, implicit-def $ctr8, implicit $ctr8
+  ; CHECK-NEXT:   B %bb.6
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.6.for.body:
+  ; CHECK-NEXT:   successors: %bb.7(0x40000000), %bb.14(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[XSADDSP1:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU2]], [[LFSU2]]
+  ; CHECK-NEXT:   [[LFSU6:%[0-9]+]]:f4rc, [[LFSU7:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY5]] :: (load unknown-size from %ir.3, align 4)
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU7]]
+  ; CHECK-NEXT:   BDZ8 %bb.14, implicit-def $ctr8, implicit $ctr8
+  ; CHECK-NEXT:   B %bb.7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.7.for.body:
+  ; CHECK-NEXT:   successors: %bb.8(0x40000000), %bb.13(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[XSADDSP2:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[LFSU]], [[XSADDSP]]
+  ; CHECK-NEXT:   [[XSADDSP3:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU4]], [[LFSU4]]
+  ; CHECK-NEXT:   [[LFSU8:%[0-9]+]]:f4rc, [[LFSU9:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY6]] :: (load unknown-size from %ir.3, align 4)
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU9]]
+  ; CHECK-NEXT:   BDZ8 %bb.13, implicit-def $ctr8, implicit $ctr8
+  ; CHECK-NEXT:   B %bb.8
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.8.for.body:
+  ; CHECK-NEXT:   successors: %bb.9(0x40000000), %bb.12(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[XSADDSP4:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[LFSU2]], [[XSADDSP1]]
+  ; CHECK-NEXT:   [[XSADDSP5:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU6]], [[LFSU6]]
+  ; CHECK-NEXT:   [[LFSU10:%[0-9]+]]:f4rc, [[LFSU11:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY7]] :: (load unknown-size from %ir.3, align 4)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU11]]
+  ; CHECK-NEXT:   BDZ8 %bb.12, implicit-def $ctr8, implicit $ctr8
+  ; CHECK-NEXT:   B %bb.9
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.9.for.body:
+  ; CHECK-NEXT:   successors: %bb.10(0x80000000), %bb.11(0x00000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[XSADDSP6:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[LFSU4]], [[XSADDSP3]]
+  ; CHECK-NEXT:   [[XSADDSP7:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU8]], [[LFSU8]]
+  ; CHECK-NEXT:   [[LFSU12:%[0-9]+]]:f4rc, [[LFSU13:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY8]] :: (load unknown-size from %ir.3, align 4)
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:g8rc = COPY [[LFSU13]]
+  ; CHECK-NEXT:   BDZ8 %bb.11, implicit-def $ctr8, implicit $ctr8
+  ; CHECK-NEXT:   B %bb.10
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.10.for.body:
+  ; CHECK-NEXT:   successors: %bb.10(0x7c000000), %bb.11(0x04000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[COPY9]], %bb.9, %50, %bb.10
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.9, %47, %bb.10
+  ; CHECK-NEXT:   [[PHI2:%[0-9]+]]:f4rc = PHI [[LFSU12]], %bb.9, %45, %bb.10
+  ; CHECK-NEXT:   [[PHI3:%[0-9]+]]:f4rc = PHI [[LFSU10]], %bb.9, [[PHI2]], %bb.10
+  ; CHECK-NEXT:   [[PHI4:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.9, [[PHI3]], %bb.10
+  ; CHECK-NEXT:   [[PHI5:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.9, [[PHI4]], %bb.10
+  ; CHECK-NEXT:   [[PHI6:%[0-9]+]]:vssrc = PHI [[XSADDSP7]], %bb.9, %48, %bb.10
+  ; CHECK-NEXT:   [[PHI7:%[0-9]+]]:vssrc = PHI [[XSADDSP5]], %bb.9, [[PHI6]], %bb.10
+  ; CHECK-NEXT:   [[PHI8:%[0-9]+]]:f4rc = PHI [[XSADDSP6]], %bb.9, %49, %bb.10
+  ; CHECK-NEXT:   [[PHI9:%[0-9]+]]:f4rc = PHI [[XSADDSP4]], %bb.9, [[PHI8]], %bb.10
+  ; CHECK-NEXT:   [[PHI10:%[0-9]+]]:f4rc = PHI [[XSADDSP2]], %bb.9, [[PHI9]], %bb.10
+  ; CHECK-NEXT:   [[STFSU:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI10]], 4, [[PHI1]] :: (store (s32) into %ir.2)
+  ; CHECK-NEXT:   [[LFSU14:%[0-9]+]]:f4rc, [[LFSU15:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[PHI]] :: (load unknown-size from %ir.3, align 4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:g8rc = COPY [[STFSU]]
+  ; CHECK-NEXT:   [[XSADDSP8:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[PHI3]], [[PHI3]]
+  ; CHECK-NEXT:   [[XSADDSP9:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI5]], [[PHI7]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:g8rc = COPY [[LFSU15]]
+  ; CHECK-NEXT:   BDNZ8 %bb.10, implicit-def $ctr8, implicit $ctr8
+  ; CHECK-NEXT:   B %bb.11
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.11:
+  ; CHECK-NEXT:   successors: %bb.12(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI11:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.9, [[COPY10]], %bb.10
+  ; CHECK-NEXT:   [[PHI12:%[0-9]+]]:f4rc = PHI [[LFSU12]], %bb.9, [[LFSU14]], %bb.10
+  ; CHECK-NEXT:   [[PHI13:%[0-9]+]]:f4rc = PHI [[LFSU10]], %bb.9, [[PHI2]], %bb.10
+  ; CHECK-NEXT:   [[PHI14:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.9, [[PHI3]], %bb.10
+  ; CHECK-NEXT:   [[PHI15:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.9, [[PHI4]], %bb.10
+  ; CHECK-NEXT:   [[PHI16:%[0-9]+]]:vssrc = PHI [[XSADDSP7]], %bb.9, [[XSADDSP8]], %bb.10
+  ; CHECK-NEXT:   [[PHI17:%[0-9]+]]:vssrc = PHI [[XSADDSP5]], %bb.9, [[PHI6]], %bb.10
+  ; CHECK-NEXT:   [[PHI18:%[0-9]+]]:f4rc = PHI [[XSADDSP6]], %bb.9, [[XSADDSP9]], %bb.10
+  ; CHECK-NEXT:   [[PHI19:%[0-9]+]]:f4rc = PHI [[XSADDSP4]], %bb.9, [[PHI8]], %bb.10
+  ; CHECK-NEXT:   [[PHI20:%[0-9]+]]:f4rc = PHI [[XSADDSP2]], %bb.9, [[PHI9]], %bb.10
+  ; CHECK-NEXT:   [[STFSU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI20]], 4, [[PHI11]] :: (store unknown-size into %ir.2, align 4)
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:g8rc = COPY [[STFSU1]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.12:
+  ; CHECK-NEXT:   successors: %bb.13(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI21:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.8, [[COPY12]], %bb.11
+  ; CHECK-NEXT:   [[PHI22:%[0-9]+]]:f4rc = PHI [[LFSU10]], %bb.8, [[PHI12]], %bb.11
+  ; CHECK-NEXT:   [[PHI23:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.8, [[PHI13]], %bb.11
+  ; CHECK-NEXT:   [[PHI24:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.8, [[PHI14]], %bb.11
+  ; CHECK-NEXT:   [[PHI25:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.8, [[PHI15]], %bb.11
+  ; CHECK-NEXT:   [[PHI26:%[0-9]+]]:vssrc = PHI [[XSADDSP5]], %bb.8, [[PHI16]], %bb.11
+  ; CHECK-NEXT:   [[PHI27:%[0-9]+]]:vssrc = PHI [[XSADDSP3]], %bb.8, [[PHI17]], %bb.11
+  ; CHECK-NEXT:   [[PHI28:%[0-9]+]]:f4rc = PHI [[XSADDSP4]], %bb.8, [[PHI18]], %bb.11
+  ; CHECK-NEXT:   [[PHI29:%[0-9]+]]:f4rc = PHI [[XSADDSP2]], %bb.8, [[PHI19]], %bb.11
+  ; CHECK-NEXT:   [[STFSU2:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI29]], 4, [[PHI21]] :: (store unknown-size into %ir.2, align 4)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:g8rc = COPY [[STFSU2]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.13:
+  ; CHECK-NEXT:   successors: %bb.14(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI30:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.7, [[COPY13]], %bb.12
+  ; CHECK-NEXT:   [[PHI31:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.7, [[PHI22]], %bb.12
+  ; CHECK-NEXT:   [[PHI32:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.7, [[PHI23]], %bb.12
+  ; CHECK-NEXT:   [[PHI33:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.7, [[PHI24]], %bb.12
+  ; CHECK-NEXT:   [[PHI34:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.7, [[PHI25]], %bb.12
+  ; CHECK-NEXT:   [[PHI35:%[0-9]+]]:vssrc = PHI [[XSADDSP3]], %bb.7, [[PHI26]], %bb.12
+  ; CHECK-NEXT:   [[PHI36:%[0-9]+]]:vssrc = PHI [[XSADDSP1]], %bb.7, [[PHI27]], %bb.12
+  ; CHECK-NEXT:   [[PHI37:%[0-9]+]]:f4rc = PHI [[XSADDSP2]], %bb.7, [[PHI28]], %bb.12
+  ; CHECK-NEXT:   [[STFSU3:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI37]], 4, [[PHI30]] :: (store unknown-size into %ir.2, align 4)
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:g8rc = COPY [[STFSU3]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.14:
+  ; CHECK-NEXT:   successors: %bb.15(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI38:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.6, [[COPY14]], %bb.13
+  ; CHECK-NEXT:   [[PHI39:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.6, [[PHI31]], %bb.13
+  ; CHECK-NEXT:   [[PHI40:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.6, [[PHI32]], %bb.13
+  ; CHECK-NEXT:   [[PHI41:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.6, [[PHI33]], %bb.13
+  ; CHECK-NEXT:   [[PHI42:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.6, [[PHI34]], %bb.13
+  ; CHECK-NEXT:   [[PHI43:%[0-9]+]]:vssrc = PHI [[XSADDSP1]], %bb.6, [[PHI35]], %bb.13
+  ; CHECK-NEXT:   [[PHI44:%[0-9]+]]:vssrc = PHI [[XSADDSP]], %bb.6, [[PHI36]], %bb.13
+  ; CHECK-NEXT:   [[XSADDSP10:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI42]], [[PHI44]]
+  ; CHECK-NEXT:   [[STFSU4:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[XSADDSP10]], 4, [[PHI38]] :: (store unknown-size into %ir.2, align 4)
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:g8rc = COPY [[STFSU4]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.15:
+  ; CHECK-NEXT:   successors: %bb.16(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI45:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.5, [[COPY15]], %bb.14
+  ; CHECK-NEXT:   [[PHI46:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.5, [[PHI39]], %bb.14
+  ; CHECK-NEXT:   [[PHI47:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.5, [[PHI40]], %bb.14
+  ; CHECK-NEXT:   [[PHI48:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.5, [[PHI41]], %bb.14
+  ; CHECK-NEXT:   [[PHI49:%[0-9]+]]:vssrc = PHI [[XSADDSP]], %bb.5, [[PHI43]], %bb.14
+  ; CHECK-NEXT:   [[XSADDSP11:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI48]], [[PHI49]]
+  ; CHECK-NEXT:   [[STFSU5:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[XSADDSP11]], 4, [[PHI45]] :: (store unknown-size into %ir.2, align 4)
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:g8rc = COPY [[STFSU5]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.16:
+  ; CHECK-NEXT:   successors: %bb.17(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI50:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.4, [[COPY16]], %bb.15
+  ; CHECK-NEXT:   [[PHI51:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.4, [[PHI46]], %bb.15
+  ; CHECK-NEXT:   [[PHI52:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.4, [[PHI47]], %bb.15
+  ; CHECK-NEXT:   [[XSADDSP12:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[PHI52]], [[PHI52]]
+  ; CHECK-NEXT:   [[XSADDSP13:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI52]], [[XSADDSP12]]
+  ; CHECK-NEXT:   [[STFSU6:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[XSADDSP13]], 4, [[PHI50]] :: (store unknown-size into %ir.2, align 4)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:g8rc = COPY [[STFSU6]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.17:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI53:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.3, [[COPY17]], %bb.16
+  ; CHECK-NEXT:   [[PHI54:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.3, [[PHI51]], %bb.16
+  ; CHECK-NEXT:   [[XSADDSP14:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[PHI54]], [[PHI54]]
+  ; CHECK-NEXT:   [[XSADDSP15:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI54]], [[XSADDSP14]]
+  ; CHECK-NEXT:   [[STFSU7:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[XSADDSP15]], 4, [[PHI53]] :: (store unknown-size into %ir.2, align 4)
+  ; CHECK-NEXT:   B %bb.1
+  bb.0.entry:
+    liveins: $x3, $x4, $x5
+
+    %8:g8rc = COPY $x5
+    %7:g8rc_and_g8rc_nox0 = COPY $x4
+    %6:g8rc_and_g8rc_nox0 = COPY $x3
+    %9:g8rc = RLDICL %8, 0, 32
+    %0:g8rc = ADDI8 %7, -4
+    %1:g8rc = ADDI8 %6, -4
+    MTCTR8loop killed %9, implicit-def dead $ctr8
+    B %bb.2
+
+  bb.1.for.cond.cleanup:
+    BLR8 implicit $lr8, implicit $rm
+
+  bb.2.for.body:
+    successors: %bb.2(0x7c000000), %bb.1(0x04000000)
+
+    %2:g8rc_and_g8rc_nox0 = PHI %0, %bb.0, %5, %bb.2
+    %3:g8rc_and_g8rc_nox0 = PHI %1, %bb.0, %4, %bb.2
+    %10:f4rc, %11:g8rc_and_g8rc_nox0 = LFSU 4, %2 :: (load (s32) from %ir.3)
+    %12:vssrc = nofpexcept XSADDSP %10, %10
+    %13:f4rc = nofpexcept XSADDSP %10, killed %12
+    %14:g8rc_and_g8rc_nox0 = STFSU killed %13, 4, %3 :: (store (s32) into %ir.2)
+    %4:g8rc = COPY %14
+    %5:g8rc = COPY %11
+    BDNZ8 %bb.2, implicit-def dead $ctr8, implicit $ctr8
+    B %bb.1
+
+...


        


More information about the llvm-commits mailing list