[llvm] c3f9697 - [PowerPC] Fix wrong codegen when stack pointer has to realign performing dynalloc

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 21 23:35:42 PDT 2020


Author: Kai Luo
Date: 2020-07-22T06:35:12Z
New Revision: c3f9697f1f227296818fbaf1a770a29842ea454c

URL: https://github.com/llvm/llvm-project/commit/c3f9697f1f227296818fbaf1a770a29842ea454c
DIFF: https://github.com/llvm/llvm-project/commit/c3f9697f1f227296818fbaf1a770a29842ea454c.diff

LOG: [PowerPC] Fix wrong codegen when stack pointer has to realign performing dynalloc

Current powerpc backend generates wrong code sequence if stack pointer
has to realign if `-fstack-clash-protection` enabled. When probing
dynamic stack allocation, current `PREPARE_PROBED_ALLOCA` takes
`NegSizeReg` as input and returns
`FinalStackPtr`. `FinalStackPtr=StackPtr+ActualNegSize` is calculated
correctly, however code following `PREPARE_PROBED_ALLOCA` still uses
value of `NegSizeReg`, which does not contain `ActualNegSize` if
`MaxAlign > TargetAlign`, to calculate loop trip count and residual
number of bytes.

This patch is part of fix of
https://bugs.llvm.org/show_bug.cgi?id=46759.

Differential Revision: https://reviews.llvm.org/D84152

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstr64Bit.td
    llvm/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
    llvm/test/CodeGen/PowerPC/pr46759.ll
    llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index f8d7ab87f35c..fe9ab604ec2f 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11954,18 +11954,34 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
-
-  // Get the canonical FinalStackPtr like what
-  // PPCRegisterInfo::lowerDynamicAlloc does.
-  BuildMI(*MBB, {MI}, DL,
-          TII->get(isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64
-                           : PPC::PREPARE_PROBED_ALLOCA_32),
-          FramePointer)
-      .addDef(FinalStackPtr)
+  Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
+
+  // Since value of NegSizeReg might be realigned in prologepilog, insert a
+  // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
+  // NegSize.
+  unsigned ProbeOpc;
+  if (!MRI.hasOneNonDBGUse(NegSizeReg))
+    ProbeOpc =
+        isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
+  else
+    // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
+    // and NegSizeReg will be allocated in the same phyreg to avoid
+    // redundant copy when NegSizeReg has only one use which is current MI and
+    // will be replaced by PREPARE_PROBED_ALLOCA then.
+    ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
+                       : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
+  BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
+      .addDef(ActualNegSizeReg)
       .addReg(NegSizeReg)
       .add(MI.getOperand(2))
       .add(MI.getOperand(3));
 
+  // Calculate final stack pointer, which equals to SP + ActualNegSize.
+  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
+          FinalStackPtr)
+      .addReg(SPReg)
+      .addReg(ActualNegSizeReg);
+
   // Materialize a scratch register for update.
   int64_t NegProbeSize = -(int64_t)ProbeSize;
   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
@@ -11986,7 +12002,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
     // Probing leading residual part.
     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
-        .addReg(NegSizeReg)
+        .addReg(ActualNegSizeReg)
         .addReg(ScratchReg);
     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
@@ -11995,7 +12011,7 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
         .addReg(Mul)
-        .addReg(NegSizeReg);
+        .addReg(ActualNegSizeReg);
     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
         .addReg(FramePointer)
         .addReg(SPReg)

diff  --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 1c457d4170d5..6956c40a70be 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -431,9 +431,14 @@ def PROBED_ALLOCA_64 : PPCCustomInserterPseudo<(outs g8rc:$result),
                          (ins g8rc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_64",
                            [(set i64:$result,
                              (PPCprobedalloca i64:$negsize, iaddr:$fpsi))]>;
-def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs g8rc:$fp,
-    g8rc:$sp),
+def PREPARE_PROBED_ALLOCA_64 : PPCEmitTimePseudo<(outs
+    g8rc:$fp, g8rc:$actual_negsize),
     (ins g8rc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_64", []>;
+def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 : PPCEmitTimePseudo<(outs
+    g8rc:$fp, g8rc:$actual_negsize),
+    (ins g8rc:$negsize, memri:$fpsi),
+    "#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64", []>,
+    RegConstraint<"$actual_negsize = $negsize">;
 def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
     (ins i64imm:$stacksize),
     "#PROBED_STACKALLOC_64", []>;

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index c49e7a3dc6c2..c565758973bf 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1406,9 +1406,14 @@ def PROBED_ALLOCA_32 : PPCCustomInserterPseudo<(outs gprc:$result),
                          (ins gprc:$negsize, memri:$fpsi), "#PROBED_ALLOCA_32",
                            [(set i32:$result,
                              (PPCprobedalloca i32:$negsize, iaddr:$fpsi))]>;
-def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs gprc:$fp,
-    gprc:$sp),
+def PREPARE_PROBED_ALLOCA_32 : PPCEmitTimePseudo<(outs
+    gprc:$fp, gprc:$actual_negsize),
     (ins gprc:$negsize, memri:$fpsi), "#PREPARE_PROBED_ALLOCA_32", []>;
+def PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32 : PPCEmitTimePseudo<(outs
+    gprc:$fp, gprc:$actual_negsize),
+    (ins gprc:$negsize, memri:$fpsi),
+    "#PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32", []>,
+    RegConstraint<"$actual_negsize = $negsize">;
 def PROBED_STACKALLOC_32 : PPCEmitTimePseudo<(outs gprc:$scratch, gprc:$temp),
     (ins i64imm:$stacksize),
     "#PROBED_STACKALLOC_32", []>;

diff  --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 35f5e1fbebcd..ed8948a63972 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -624,21 +624,30 @@ void PPCRegisterInfo::lowerPrepareProbedAlloca(
   bool LP64 = TM.isPPC64();
   DebugLoc dl = MI.getDebugLoc();
   Register FramePointer = MI.getOperand(0).getReg();
-  Register FinalStackPtr = MI.getOperand(1).getReg();
+  const Register ActualNegSizeReg = MI.getOperand(1).getReg();
   bool KillNegSizeReg = MI.getOperand(2).isKill();
   Register NegSizeReg = MI.getOperand(2).getReg();
-  prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
-  if (LP64) {
-    BuildMI(MBB, II, dl, TII.get(PPC::ADD8), FinalStackPtr)
-        .addReg(PPC::X1)
-        .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
-
-  } else {
-    BuildMI(MBB, II, dl, TII.get(PPC::ADD4), FinalStackPtr)
-        .addReg(PPC::R1)
-        .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+  const MCInstrDesc &CopyInst = TII.get(LP64 ? PPC::OR8 : PPC::OR);
+  // RegAllocator might allocate FramePointer and NegSizeReg in the same phyreg.
+  if (FramePointer == NegSizeReg) {
+    assert(KillNegSizeReg && "FramePointer is a def and NegSizeReg is an use, "
+                             "NegSizeReg should be killed");
+    // FramePointer is clobbered earlier than the use of NegSizeReg in
+    // prepareDynamicAlloca, save NegSizeReg in ActualNegSizeReg to avoid
+    // misuse.
+    BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
+        .addReg(NegSizeReg)
+        .addReg(NegSizeReg);
+    NegSizeReg = ActualNegSizeReg;
+    KillNegSizeReg = false;
   }
-
+  prepareDynamicAlloca(II, NegSizeReg, KillNegSizeReg, FramePointer);
+  // NegSizeReg might be updated in prepareDynamicAlloca if MaxAlign >
+  // TargetAlign.
+  if (NegSizeReg != ActualNegSizeReg)
+    BuildMI(MBB, II, dl, CopyInst, ActualNegSizeReg)
+        .addReg(NegSizeReg)
+        .addReg(NegSizeReg);
   MBB.erase(II);
 }
 
@@ -1084,7 +1093,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   if (FPSI && FrameIndex == FPSI &&
       (OpC == PPC::PREPARE_PROBED_ALLOCA_64 ||
-       OpC == PPC::PREPARE_PROBED_ALLOCA_32)) {
+       OpC == PPC::PREPARE_PROBED_ALLOCA_32 ||
+       OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 ||
+       OpC == PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32)) {
     lowerPrepareProbedAlloca(II);
     return;
   }

diff  --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
index 4d3e8cadc21e..d1d68a5db7e3 100644
--- a/llvm/test/CodeGen/PowerPC/pr46759.ll
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -20,26 +20,27 @@ define void @foo(i32 %vla_size) #0 {
 ; CHECK-LE-NEXT:    .cfi_offset r31, -8
 ; CHECK-LE-NEXT:    .cfi_offset r30, -16
 ; CHECK-LE-NEXT:    clrldi r3, r3, 32
-; CHECK-LE-NEXT:    li r6, -4096
-; CHECK-LE-NEXT:    ld r4, 0(r1)
+; CHECK-LE-NEXT:    li r5, -2048
 ; CHECK-LE-NEXT:    mr r31, r1
 ; CHECK-LE-NEXT:    addi r3, r3, 15
 ; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
-; CHECK-LE-NEXT:    neg r5, r3
-; CHECK-LE-NEXT:    li r3, -2048
-; CHECK-LE-NEXT:    divd r7, r5, r6
-; CHECK-LE-NEXT:    and r3, r5, r3
-; CHECK-LE-NEXT:    add r3, r1, r3
-; CHECK-LE-NEXT:    mulld r6, r7, r6
-; CHECK-LE-NEXT:    sub r5, r5, r6
-; CHECK-LE-NEXT:    stdux r4, r1, r5
-; CHECK-LE-NEXT:    cmpd r1, r3
+; CHECK-LE-NEXT:    neg r4, r3
+; CHECK-LE-NEXT:    ld r3, 0(r1)
+; CHECK-LE-NEXT:    and r5, r4, r5
+; CHECK-LE-NEXT:    mr r4, r5
+; CHECK-LE-NEXT:    li r5, -4096
+; CHECK-LE-NEXT:    divd r6, r4, r5
+; CHECK-LE-NEXT:    mulld r5, r6, r5
+; CHECK-LE-NEXT:    sub r5, r4, r5
+; CHECK-LE-NEXT:    add r4, r1, r4
+; CHECK-LE-NEXT:    stdux r3, r1, r5
+; CHECK-LE-NEXT:    cmpd r1, r4
 ; CHECK-LE-NEXT:    beq cr0, .LBB0_2
 ; CHECK-LE-NEXT:  .LBB0_1: # %entry
 ; CHECK-LE-NEXT:    #
-; CHECK-LE-NEXT:    stdu r4, -4096(r1)
-; CHECK-LE-NEXT:    cmpd r1, r3
+; CHECK-LE-NEXT:    stdu r3, -4096(r1)
+; CHECK-LE-NEXT:    cmpd r1, r4
 ; CHECK-LE-NEXT:    bne cr0, .LBB0_1
 ; CHECK-LE-NEXT:  .LBB0_2: # %entry
 ; CHECK-LE-NEXT:    addi r3, r1, 2048

diff  --git a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
index b475a2f7fbf1..eef02e77c2b1 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
@@ -18,23 +18,23 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin
 ; CHECK-LE-NEXT:    std r31, -8(r1)
 ; CHECK-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-LE-NEXT:    rldic r3, r3, 2, 30
-; CHECK-LE-NEXT:    li r6, -32768
+; CHECK-LE-NEXT:    li r5, -32768
 ; CHECK-LE-NEXT:    mr r31, r1
 ; CHECK-LE-NEXT:    addi r3, r3, 15
-; CHECK-LE-NEXT:    addi r4, r31, 48
 ; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-LE-NEXT:    rldicl r3, r3, 4, 29
-; CHECK-LE-NEXT:    neg r5, r3
-; CHECK-LE-NEXT:    divd r7, r5, r6
-; CHECK-LE-NEXT:    add r3, r1, r5
-; CHECK-LE-NEXT:    mulld r6, r7, r6
-; CHECK-LE-NEXT:    sub r5, r5, r6
-; CHECK-LE-NEXT:    stdux r4, r1, r5
-; CHECK-LE-NEXT:    cmpd r1, r3
+; CHECK-LE-NEXT:    neg r4, r3
+; CHECK-LE-NEXT:    addi r3, r31, 48
+; CHECK-LE-NEXT:    divd r6, r4, r5
+; CHECK-LE-NEXT:    mulld r5, r6, r5
+; CHECK-LE-NEXT:    sub r5, r4, r5
+; CHECK-LE-NEXT:    add r4, r1, r4
+; CHECK-LE-NEXT:    stdux r3, r1, r5
+; CHECK-LE-NEXT:    cmpd r1, r4
 ; CHECK-LE-NEXT:    beq cr0, .LBB0_2
 ; CHECK-LE-NEXT:  .LBB0_1:
-; CHECK-LE-NEXT:    stdu r4, -32768(r1)
-; CHECK-LE-NEXT:    cmpd r1, r3
+; CHECK-LE-NEXT:    stdu r3, -32768(r1)
+; CHECK-LE-NEXT:    cmpd r1, r4
 ; CHECK-LE-NEXT:    bne cr0, .LBB0_1
 ; CHECK-LE-NEXT:  .LBB0_2:
 ; CHECK-LE-NEXT:    li r4, 1
@@ -53,20 +53,20 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin
 ; CHECK-P9-LE-NEXT:    addi r3, r3, 15
 ; CHECK-P9-LE-NEXT:    li r6, -32768
 ; CHECK-P9-LE-NEXT:    mr r31, r1
-; CHECK-P9-LE-NEXT:    addi r4, r31, 48
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 4, 29
 ; CHECK-P9-LE-NEXT:    neg r5, r3
+; CHECK-P9-LE-NEXT:    addi r3, r31, 48
 ; CHECK-P9-LE-NEXT:    divd r7, r5, r6
-; CHECK-P9-LE-NEXT:    add r3, r1, r5
+; CHECK-P9-LE-NEXT:    add r4, r1, r5
 ; CHECK-P9-LE-NEXT:    mulld r6, r7, r6
 ; CHECK-P9-LE-NEXT:    sub r5, r5, r6
-; CHECK-P9-LE-NEXT:    stdux r4, r1, r5
-; CHECK-P9-LE-NEXT:    cmpd r1, r3
+; CHECK-P9-LE-NEXT:    stdux r3, r1, r5
+; CHECK-P9-LE-NEXT:    cmpd r1, r4
 ; CHECK-P9-LE-NEXT:    beq cr0, .LBB0_2
 ; CHECK-P9-LE-NEXT:  .LBB0_1:
-; CHECK-P9-LE-NEXT:    stdu r4, -32768(r1)
-; CHECK-P9-LE-NEXT:    cmpd r1, r3
+; CHECK-P9-LE-NEXT:    stdu r3, -32768(r1)
+; CHECK-P9-LE-NEXT:    cmpd r1, r4
 ; CHECK-P9-LE-NEXT:    bne cr0, .LBB0_1
 ; CHECK-P9-LE-NEXT:  .LBB0_2:
 ; CHECK-P9-LE-NEXT:    li r4, 1
@@ -82,23 +82,23 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin
 ; CHECK-BE-NEXT:    std r31, -8(r1)
 ; CHECK-BE-NEXT:    stdu r1, -64(r1)
 ; CHECK-BE-NEXT:    rldic r3, r3, 2, 30
-; CHECK-BE-NEXT:    li r6, -32768
+; CHECK-BE-NEXT:    li r5, -32768
 ; CHECK-BE-NEXT:    addi r3, r3, 15
 ; CHECK-BE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-BE-NEXT:    mr r31, r1
 ; CHECK-BE-NEXT:    rldicl r3, r3, 4, 29
-; CHECK-BE-NEXT:    addi r4, r31, 64
-; CHECK-BE-NEXT:    neg r5, r3
-; CHECK-BE-NEXT:    divd r7, r5, r6
-; CHECK-BE-NEXT:    add r3, r1, r5
-; CHECK-BE-NEXT:    mulld r6, r7, r6
-; CHECK-BE-NEXT:    sub r5, r5, r6
-; CHECK-BE-NEXT:    stdux r4, r1, r5
-; CHECK-BE-NEXT:    cmpd r1, r3
+; CHECK-BE-NEXT:    neg r4, r3
+; CHECK-BE-NEXT:    divd r6, r4, r5
+; CHECK-BE-NEXT:    addi r3, r31, 64
+; CHECK-BE-NEXT:    mulld r5, r6, r5
+; CHECK-BE-NEXT:    sub r5, r4, r5
+; CHECK-BE-NEXT:    add r4, r1, r4
+; CHECK-BE-NEXT:    stdux r3, r1, r5
+; CHECK-BE-NEXT:    cmpd r1, r4
 ; CHECK-BE-NEXT:    beq cr0, .LBB0_2
 ; CHECK-BE-NEXT:  .LBB0_1:
-; CHECK-BE-NEXT:    stdu r4, -32768(r1)
-; CHECK-BE-NEXT:    cmpd r1, r3
+; CHECK-BE-NEXT:    stdu r3, -32768(r1)
+; CHECK-BE-NEXT:    cmpd r1, r4
 ; CHECK-BE-NEXT:    bne cr0, .LBB0_1
 ; CHECK-BE-NEXT:  .LBB0_2:
 ; CHECK-BE-NEXT:    li r4, 1
@@ -115,21 +115,21 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin
 ; CHECK-32-NEXT:    slwi r3, r3, 2
 ; CHECK-32-NEXT:    addi r3, r3, 15
 ; CHECK-32-NEXT:    rlwinm r3, r3, 0, 0, 27
-; CHECK-32-NEXT:    neg r5, r3
-; CHECK-32-NEXT:    li r6, -32768
-; CHECK-32-NEXT:    divw r7, r5, r6
+; CHECK-32-NEXT:    neg r4, r3
+; CHECK-32-NEXT:    li r5, -32768
+; CHECK-32-NEXT:    divw r6, r4, r5
 ; CHECK-32-NEXT:    stw r31, 28(r1)
 ; CHECK-32-NEXT:    mr r31, r1
-; CHECK-32-NEXT:    addi r4, r31, 32
-; CHECK-32-NEXT:    add r3, r1, r5
-; CHECK-32-NEXT:    mullw r6, r7, r6
-; CHECK-32-NEXT:    sub r5, r5, r6
-; CHECK-32-NEXT:    stwux r4, r1, r5
-; CHECK-32-NEXT:    cmpw r1, r3
+; CHECK-32-NEXT:    addi r3, r31, 32
+; CHECK-32-NEXT:    mullw r5, r6, r5
+; CHECK-32-NEXT:    sub r5, r4, r5
+; CHECK-32-NEXT:    add r4, r1, r4
+; CHECK-32-NEXT:    stwux r3, r1, r5
+; CHECK-32-NEXT:    cmpw r1, r4
 ; CHECK-32-NEXT:    beq cr0, .LBB0_2
 ; CHECK-32-NEXT:  .LBB0_1:
-; CHECK-32-NEXT:    stwu r4, -32768(r1)
-; CHECK-32-NEXT:    cmpw r1, r3
+; CHECK-32-NEXT:    stwu r3, -32768(r1)
+; CHECK-32-NEXT:    cmpw r1, r4
 ; CHECK-32-NEXT:    bne cr0, .LBB0_1
 ; CHECK-32-NEXT:  .LBB0_2:
 ; CHECK-32-NEXT:    li r4, 1
@@ -154,23 +154,23 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind {
 ; CHECK-LE-NEXT:    std r31, -8(r1)
 ; CHECK-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-LE-NEXT:    rldic r4, r3, 2, 30
-; CHECK-LE-NEXT:    li r7, -4096
+; CHECK-LE-NEXT:    li r6, -4096
 ; CHECK-LE-NEXT:    mr r31, r1
 ; CHECK-LE-NEXT:    addi r4, r4, 15
-; CHECK-LE-NEXT:    addi r5, r31, 48
 ; CHECK-LE-NEXT:    rldicl r4, r4, 60, 4
 ; CHECK-LE-NEXT:    rldicl r4, r4, 4, 29
-; CHECK-LE-NEXT:    neg r6, r4
-; CHECK-LE-NEXT:    divd r8, r6, r7
-; CHECK-LE-NEXT:    add r4, r1, r6
-; CHECK-LE-NEXT:    mulld r7, r8, r7
-; CHECK-LE-NEXT:    sub r6, r6, r7
-; CHECK-LE-NEXT:    stdux r5, r1, r6
-; CHECK-LE-NEXT:    cmpd r1, r4
+; CHECK-LE-NEXT:    neg r5, r4
+; CHECK-LE-NEXT:    addi r4, r31, 48
+; CHECK-LE-NEXT:    divd r7, r5, r6
+; CHECK-LE-NEXT:    mulld r6, r7, r6
+; CHECK-LE-NEXT:    sub r6, r5, r6
+; CHECK-LE-NEXT:    add r5, r1, r5
+; CHECK-LE-NEXT:    stdux r4, r1, r6
+; CHECK-LE-NEXT:    cmpd r1, r5
 ; CHECK-LE-NEXT:    beq cr0, .LBB1_2
 ; CHECK-LE-NEXT:  .LBB1_1:
-; CHECK-LE-NEXT:    stdu r5, -4096(r1)
-; CHECK-LE-NEXT:    cmpd r1, r4
+; CHECK-LE-NEXT:    stdu r4, -4096(r1)
+; CHECK-LE-NEXT:    cmpd r1, r5
 ; CHECK-LE-NEXT:    bne cr0, .LBB1_1
 ; CHECK-LE-NEXT:  .LBB1_2:
 ; CHECK-LE-NEXT:    extsw r3, r3
@@ -192,20 +192,20 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind {
 ; CHECK-P9-LE-NEXT:    addi r4, r4, 15
 ; CHECK-P9-LE-NEXT:    li r7, -4096
 ; CHECK-P9-LE-NEXT:    mr r31, r1
-; CHECK-P9-LE-NEXT:    addi r5, r31, 48
 ; CHECK-P9-LE-NEXT:    rldicl r4, r4, 60, 4
 ; CHECK-P9-LE-NEXT:    rldicl r4, r4, 4, 29
 ; CHECK-P9-LE-NEXT:    neg r6, r4
+; CHECK-P9-LE-NEXT:    addi r4, r31, 48
 ; CHECK-P9-LE-NEXT:    divd r8, r6, r7
-; CHECK-P9-LE-NEXT:    add r4, r1, r6
+; CHECK-P9-LE-NEXT:    add r5, r1, r6
 ; CHECK-P9-LE-NEXT:    mulld r7, r8, r7
 ; CHECK-P9-LE-NEXT:    sub r6, r6, r7
-; CHECK-P9-LE-NEXT:    stdux r5, r1, r6
-; CHECK-P9-LE-NEXT:    cmpd r1, r4
+; CHECK-P9-LE-NEXT:    stdux r4, r1, r6
+; CHECK-P9-LE-NEXT:    cmpd r1, r5
 ; CHECK-P9-LE-NEXT:    beq cr0, .LBB1_2
 ; CHECK-P9-LE-NEXT:  .LBB1_1:
-; CHECK-P9-LE-NEXT:    stdu r5, -4096(r1)
-; CHECK-P9-LE-NEXT:    cmpd r1, r4
+; CHECK-P9-LE-NEXT:    stdu r4, -4096(r1)
+; CHECK-P9-LE-NEXT:    cmpd r1, r5
 ; CHECK-P9-LE-NEXT:    bne cr0, .LBB1_1
 ; CHECK-P9-LE-NEXT:  .LBB1_2:
 ; CHECK-P9-LE-NEXT:    extswsli r3, r3, 2
@@ -223,23 +223,23 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind {
 ; CHECK-BE-NEXT:    std r31, -8(r1)
 ; CHECK-BE-NEXT:    stdu r1, -64(r1)
 ; CHECK-BE-NEXT:    rldic r4, r3, 2, 30
-; CHECK-BE-NEXT:    li r7, -4096
+; CHECK-BE-NEXT:    li r6, -4096
 ; CHECK-BE-NEXT:    addi r4, r4, 15
 ; CHECK-BE-NEXT:    rldicl r4, r4, 60, 4
 ; CHECK-BE-NEXT:    mr r31, r1
 ; CHECK-BE-NEXT:    rldicl r4, r4, 4, 29
-; CHECK-BE-NEXT:    addi r5, r31, 64
-; CHECK-BE-NEXT:    neg r6, r4
-; CHECK-BE-NEXT:    divd r8, r6, r7
-; CHECK-BE-NEXT:    add r4, r1, r6
-; CHECK-BE-NEXT:    mulld r7, r8, r7
-; CHECK-BE-NEXT:    sub r6, r6, r7
-; CHECK-BE-NEXT:    stdux r5, r1, r6
-; CHECK-BE-NEXT:    cmpd r1, r4
+; CHECK-BE-NEXT:    neg r5, r4
+; CHECK-BE-NEXT:    divd r7, r5, r6
+; CHECK-BE-NEXT:    addi r4, r31, 64
+; CHECK-BE-NEXT:    mulld r6, r7, r6
+; CHECK-BE-NEXT:    sub r6, r5, r6
+; CHECK-BE-NEXT:    add r5, r1, r5
+; CHECK-BE-NEXT:    stdux r4, r1, r6
+; CHECK-BE-NEXT:    cmpd r1, r5
 ; CHECK-BE-NEXT:    beq cr0, .LBB1_2
 ; CHECK-BE-NEXT:  .LBB1_1:
-; CHECK-BE-NEXT:    stdu r5, -4096(r1)
-; CHECK-BE-NEXT:    cmpd r1, r4
+; CHECK-BE-NEXT:    stdu r4, -4096(r1)
+; CHECK-BE-NEXT:    cmpd r1, r5
 ; CHECK-BE-NEXT:    bne cr0, .LBB1_1
 ; CHECK-BE-NEXT:  .LBB1_2:
 ; CHECK-BE-NEXT:    extsw r3, r3
@@ -259,21 +259,21 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind {
 ; CHECK-32-NEXT:    slwi r3, r3, 2
 ; CHECK-32-NEXT:    addi r4, r3, 15
 ; CHECK-32-NEXT:    rlwinm r4, r4, 0, 0, 27
-; CHECK-32-NEXT:    neg r6, r4
-; CHECK-32-NEXT:    li r7, -4096
-; CHECK-32-NEXT:    divw r8, r6, r7
+; CHECK-32-NEXT:    neg r5, r4
+; CHECK-32-NEXT:    li r6, -4096
+; CHECK-32-NEXT:    divw r7, r5, r6
 ; CHECK-32-NEXT:    stw r31, 28(r1)
 ; CHECK-32-NEXT:    mr r31, r1
-; CHECK-32-NEXT:    addi r5, r31, 32
-; CHECK-32-NEXT:    add r4, r1, r6
-; CHECK-32-NEXT:    mullw r7, r8, r7
-; CHECK-32-NEXT:    sub r6, r6, r7
-; CHECK-32-NEXT:    stwux r5, r1, r6
-; CHECK-32-NEXT:    cmpw r1, r4
+; CHECK-32-NEXT:    addi r4, r31, 32
+; CHECK-32-NEXT:    mullw r6, r7, r6
+; CHECK-32-NEXT:    sub r6, r5, r6
+; CHECK-32-NEXT:    add r5, r1, r5
+; CHECK-32-NEXT:    stwux r4, r1, r6
+; CHECK-32-NEXT:    cmpw r1, r5
 ; CHECK-32-NEXT:    beq cr0, .LBB1_2
 ; CHECK-32-NEXT:  .LBB1_1:
-; CHECK-32-NEXT:    stwu r5, -4096(r1)
-; CHECK-32-NEXT:    cmpw r1, r4
+; CHECK-32-NEXT:    stwu r4, -4096(r1)
+; CHECK-32-NEXT:    cmpw r1, r5
 ; CHECK-32-NEXT:    bne cr0, .LBB1_1
 ; CHECK-32-NEXT:  .LBB1_2:
 ; CHECK-32-NEXT:    addi r4, r1, 16
@@ -300,24 +300,24 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind
 ; CHECK-LE-NEXT:    std r31, -8(r1)
 ; CHECK-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-LE-NEXT:    rldic r3, r3, 2, 30
-; CHECK-LE-NEXT:    lis r5, -1
+; CHECK-LE-NEXT:    lis r4, -1
 ; CHECK-LE-NEXT:    mr r31, r1
 ; CHECK-LE-NEXT:    addi r3, r3, 15
-; CHECK-LE-NEXT:    ori r5, r5, 0
-; CHECK-LE-NEXT:    addi r4, r31, 48
+; CHECK-LE-NEXT:    ori r4, r4, 0
 ; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-LE-NEXT:    rldicl r3, r3, 4, 29
-; CHECK-LE-NEXT:    neg r6, r3
-; CHECK-LE-NEXT:    divd r7, r6, r5
-; CHECK-LE-NEXT:    add r3, r1, r6
-; CHECK-LE-NEXT:    mulld r7, r7, r5
-; CHECK-LE-NEXT:    sub r6, r6, r7
-; CHECK-LE-NEXT:    stdux r4, r1, r6
-; CHECK-LE-NEXT:    cmpd r1, r3
+; CHECK-LE-NEXT:    neg r5, r3
+; CHECK-LE-NEXT:    addi r3, r31, 48
+; CHECK-LE-NEXT:    divd r6, r5, r4
+; CHECK-LE-NEXT:    mulld r6, r6, r4
+; CHECK-LE-NEXT:    sub r6, r5, r6
+; CHECK-LE-NEXT:    add r5, r1, r5
+; CHECK-LE-NEXT:    stdux r3, r1, r6
+; CHECK-LE-NEXT:    cmpd r1, r5
 ; CHECK-LE-NEXT:    beq cr0, .LBB2_2
 ; CHECK-LE-NEXT:  .LBB2_1:
-; CHECK-LE-NEXT:    stdux r4, r1, r5
-; CHECK-LE-NEXT:    cmpd r1, r3
+; CHECK-LE-NEXT:    stdux r3, r1, r4
+; CHECK-LE-NEXT:    cmpd r1, r5
 ; CHECK-LE-NEXT:    bne cr0, .LBB2_1
 ; CHECK-LE-NEXT:  .LBB2_2:
 ; CHECK-LE-NEXT:    li r4, 1
@@ -337,20 +337,20 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind
 ; CHECK-P9-LE-NEXT:    lis r5, -1
 ; CHECK-P9-LE-NEXT:    ori r5, r5, 0
 ; CHECK-P9-LE-NEXT:    mr r31, r1
-; CHECK-P9-LE-NEXT:    addi r4, r31, 48
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 4, 29
 ; CHECK-P9-LE-NEXT:    neg r6, r3
+; CHECK-P9-LE-NEXT:    addi r3, r31, 48
 ; CHECK-P9-LE-NEXT:    divd r7, r6, r5
-; CHECK-P9-LE-NEXT:    add r3, r1, r6
+; CHECK-P9-LE-NEXT:    add r4, r1, r6
 ; CHECK-P9-LE-NEXT:    mulld r7, r7, r5
 ; CHECK-P9-LE-NEXT:    sub r6, r6, r7
-; CHECK-P9-LE-NEXT:    stdux r4, r1, r6
-; CHECK-P9-LE-NEXT:    cmpd r1, r3
+; CHECK-P9-LE-NEXT:    stdux r3, r1, r6
+; CHECK-P9-LE-NEXT:    cmpd r1, r4
 ; CHECK-P9-LE-NEXT:    beq cr0, .LBB2_2
 ; CHECK-P9-LE-NEXT:  .LBB2_1:
-; CHECK-P9-LE-NEXT:    stdux r4, r1, r5
-; CHECK-P9-LE-NEXT:    cmpd r1, r3
+; CHECK-P9-LE-NEXT:    stdux r3, r1, r5
+; CHECK-P9-LE-NEXT:    cmpd r1, r4
 ; CHECK-P9-LE-NEXT:    bne cr0, .LBB2_1
 ; CHECK-P9-LE-NEXT:  .LBB2_2:
 ; CHECK-P9-LE-NEXT:    li r4, 1
@@ -366,24 +366,24 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind
 ; CHECK-BE-NEXT:    std r31, -8(r1)
 ; CHECK-BE-NEXT:    stdu r1, -64(r1)
 ; CHECK-BE-NEXT:    rldic r3, r3, 2, 30
-; CHECK-BE-NEXT:    lis r5, -1
+; CHECK-BE-NEXT:    lis r4, -1
 ; CHECK-BE-NEXT:    addi r3, r3, 15
 ; CHECK-BE-NEXT:    rldicl r3, r3, 60, 4
-; CHECK-BE-NEXT:    ori r5, r5, 0
+; CHECK-BE-NEXT:    ori r4, r4, 0
 ; CHECK-BE-NEXT:    rldicl r3, r3, 4, 29
 ; CHECK-BE-NEXT:    mr r31, r1
-; CHECK-BE-NEXT:    neg r6, r3
-; CHECK-BE-NEXT:    divd r7, r6, r5
-; CHECK-BE-NEXT:    addi r4, r31, 64
-; CHECK-BE-NEXT:    mulld r7, r7, r5
-; CHECK-BE-NEXT:    add r3, r1, r6
-; CHECK-BE-NEXT:    sub r6, r6, r7
-; CHECK-BE-NEXT:    stdux r4, r1, r6
-; CHECK-BE-NEXT:    cmpd r1, r3
+; CHECK-BE-NEXT:    neg r5, r3
+; CHECK-BE-NEXT:    divd r6, r5, r4
+; CHECK-BE-NEXT:    addi r3, r31, 64
+; CHECK-BE-NEXT:    mulld r6, r6, r4
+; CHECK-BE-NEXT:    sub r6, r5, r6
+; CHECK-BE-NEXT:    add r5, r1, r5
+; CHECK-BE-NEXT:    stdux r3, r1, r6
+; CHECK-BE-NEXT:    cmpd r1, r5
 ; CHECK-BE-NEXT:    beq cr0, .LBB2_2
 ; CHECK-BE-NEXT:  .LBB2_1:
-; CHECK-BE-NEXT:    stdux r4, r1, r5
-; CHECK-BE-NEXT:    cmpd r1, r3
+; CHECK-BE-NEXT:    stdux r3, r1, r4
+; CHECK-BE-NEXT:    cmpd r1, r5
 ; CHECK-BE-NEXT:    bne cr0, .LBB2_1
 ; CHECK-BE-NEXT:  .LBB2_2:
 ; CHECK-BE-NEXT:    li r4, 1
@@ -400,22 +400,22 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind
 ; CHECK-32-NEXT:    slwi r3, r3, 2
 ; CHECK-32-NEXT:    addi r3, r3, 15
 ; CHECK-32-NEXT:    rlwinm r3, r3, 0, 0, 27
-; CHECK-32-NEXT:    lis r5, -1
-; CHECK-32-NEXT:    neg r6, r3
-; CHECK-32-NEXT:    ori r5, r5, 0
-; CHECK-32-NEXT:    divw r7, r6, r5
+; CHECK-32-NEXT:    lis r4, -1
+; CHECK-32-NEXT:    neg r5, r3
+; CHECK-32-NEXT:    ori r4, r4, 0
+; CHECK-32-NEXT:    divw r6, r5, r4
 ; CHECK-32-NEXT:    stw r31, 28(r1)
 ; CHECK-32-NEXT:    mr r31, r1
-; CHECK-32-NEXT:    addi r4, r31, 32
-; CHECK-32-NEXT:    add r3, r1, r6
-; CHECK-32-NEXT:    mullw r7, r7, r5
-; CHECK-32-NEXT:    sub r6, r6, r7
-; CHECK-32-NEXT:    stwux r4, r1, r6
-; CHECK-32-NEXT:    cmpw r1, r3
+; CHECK-32-NEXT:    addi r3, r31, 32
+; CHECK-32-NEXT:    mullw r6, r6, r4
+; CHECK-32-NEXT:    sub r6, r5, r6
+; CHECK-32-NEXT:    add r5, r1, r5
+; CHECK-32-NEXT:    stwux r3, r1, r6
+; CHECK-32-NEXT:    cmpw r1, r5
 ; CHECK-32-NEXT:    beq cr0, .LBB2_2
 ; CHECK-32-NEXT:  .LBB2_1:
-; CHECK-32-NEXT:    stwux r4, r1, r5
-; CHECK-32-NEXT:    cmpw r1, r3
+; CHECK-32-NEXT:    stwux r3, r1, r4
+; CHECK-32-NEXT:    cmpw r1, r5
 ; CHECK-32-NEXT:    bne cr0, .LBB2_1
 ; CHECK-32-NEXT:  .LBB2_2:
 ; CHECK-32-NEXT:    li r4, 1


        


More information about the llvm-commits mailing list