[llvm] Promote the Pseudo Opcode of instructions that deduce the sign extension for extsw from 32 bits to 64 bits when eliminating the extsw instruction in PPCMIPeepholes optimization. (PR #85451)
    zhijian lin via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Thu Jun 13 12:14:39 PDT 2024
    
    
  
https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/85451
>From 79aaf13394669f29982f02ba7ee0ea1b942c3370 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 19 Mar 2024 09:56:43 -0400
Subject: [PATCH 01/15] promote Pseduo Opcode from 32bit to 64bits after
 eliminating the extsw instruction in PPCMIPeepholes optimization
---
 llvm/lib/Target/PowerPC/P10InstrResources.td  |   5 +-
 llvm/lib/Target/PowerPC/P9InstrResources.td   |   4 +-
 llvm/lib/Target/PowerPC/PPC.td                |  12 +
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td      |   5 +
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      | 212 ++++++++++++++++++
 llvm/lib/Target/PowerPC/PPCInstrInfo.h        |   5 +
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |   3 +-
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp     |   1 +
 llvm/lib/Target/PowerPC/PPCScheduleP7.td      |   2 +-
 .../convert-rr-to-ri-instrs-out-of-range.mir  |   8 +-
 .../PowerPC/convert-rr-to-ri-instrs.mir       |  10 +-
 ...ole-replaceInstr-after-eliminate-extsw.mir |  17 +-
 llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll    |   2 +
 .../CodeGen/PowerPC/select-constant-xor.ll    |   4 +
 .../test/CodeGen/PowerPC/sext_elimination.mir |  10 +-
 .../PowerPC/stack-restore-with-setjmp.ll      |   4 +-
 .../CodeGen/PowerPC/store-forward-be64.ll     |   1 +
 17 files changed, 281 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 32cebb65cb569..2e0c10344fd07 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -833,7 +833,8 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
 def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
       (instrs
     SRAD_rec,
-    SRAW_rec
+    SRAW_rec,
+    SRAW8_rec
 )>;
 
 // 2-way crack instructions
@@ -1008,7 +1009,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
     SLD,
     SLW, SLW8,
     SRAD,
-    SRAW,
+    SRAW, SRAW8,
     SRD,
     SRW, SRW8,
     SUBF, SUBF8,
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 395999c7242af..263b238a3674f 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -189,7 +189,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
     (instregex "F(N)?ABS(D|S)$"),
     (instregex "FNEG(D|S)$"),
     (instregex "FCPSGN(D|S)$"),
-    (instregex "SRAW(I)?$"),
+    (instregex "SRAW(I|8)?$"),
     (instregex "ISEL(8)?$"),
     RLDIMI,
     XSIEXPDP,
@@ -1091,7 +1091,7 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
     (instregex "RLD(I)?C(R|L)_rec$"),
     (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
     (instregex "SLW(8)?_rec$"),
-    (instregex "SRAW(I)?_rec$"),
+    (instregex "SRAW(8|I)?_rec$"),
     (instregex "SRW(8)?_rec$"),
     RLDICL_32_rec,
     RLDIMI_rec
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index b962ed28d7200..40c3d30649898 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -534,6 +534,18 @@ def getAltVSXFMAOpcode : InstrMapping {
   let ValueCols = [["1"]];
 }
 
+def get64BitInstrFromSignedExt32BitInstr : InstrMapping {
+  let FilterClass = "SExt32To64";
+  // Instructions with the same opcode.
+  let RowFields = ["Inst"];
+  // Instructions with the same Interpretation64Bit value form a column.
+  let ColFields = ["Interpretation64Bit"];
+  // The key column are not the Interpretation64Bit-form instructions.
+  let KeyCol = ["0"];
+  // Value columns are the Interpretation64Bit-form instructions.
+  let ValueCols = [["1"]];
+}
+
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 9af8ada783761..8295e9f827bcd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -932,6 +932,11 @@ defm SLW8  : XForm_6r<31,  24, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
                       "slw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
 defm SRW8  : XForm_6r<31, 536, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
                       "srw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
+
+defm SRAW8 : XForm_6rc<31, 792, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+                      "sraw", "$RA, $RST, $RB", IIC_IntShift,
+                      [(set i64:$RA, (PPCsra i64:$RST, i64:$RB))]>, SExt32To64;
+
 } // Interpretation64Bit
 
 // For fast-isel:
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9e56de732c587..e116d19bdf95a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5234,6 +5234,218 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
 // We limit the max depth to track incoming values of PHIs or binary ops
 // (e.g. AND) to avoid excessive cost.
 const unsigned MAX_BINOP_DEPTH = 1;
+
+void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
+                                                  MachineRegisterInfo *MRI,
+                                                  unsigned BinOpDepth,
+                                                  LiveVariables *LV) const {
+  MachineInstr *MI = MRI->getVRegDef(Reg);
+  if (!MI)
+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  bool IsReplaceInstr = false;
+  int NewOpcode = -1;
+
+  auto SetNewOpcode = [&](int NewOpc) {
+    if (!IsReplaceInstr) {
+      NewOpcode = NewOpc;
+      IsReplaceInstr = true;
+    }
+  };
+
+  switch (Opcode) {
+  case PPC::OR:
+    SetNewOpcode(PPC::OR8);
+    [[fallthrough]];
+  case PPC::ISEL:
+    SetNewOpcode(PPC::ISEL8);
+    [[fallthrough]];
+  case PPC::OR8:
+  case PPC::PHI:
+    if (BinOpDepth < MAX_BINOP_DEPTH) {
+      unsigned OperandEnd = 3, OperandStride = 1;
+      if (Opcode == PPC::PHI) {
+        OperandEnd = MI->getNumOperands();
+        OperandStride = 2;
+      }
+
+      for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
+        assert(MI->getOperand(I).isReg() && "Operand must be register");
+        Register SrcReg = MI->getOperand(I).getReg();
+        replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth + 1, LV);
+      }
+
+      if (!IsReplaceInstr)
+        return;
+    }
+    break;
+  case PPC::COPY: {
+    Register SrcReg = MI->getOperand(1).getReg();
+    const MachineFunction *MF = MI->getMF();
+    if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+      replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+      return;
+    }
+    // From here on everything is SVR4ABI
+    if (MI->getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock())
+      return;
+
+    if (SrcReg != PPC::X3) {
+      replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+      return;
+    }
+  }
+    return;
+  case PPC::ORI:
+    SetNewOpcode(PPC::ORI8);
+    [[fallthrough]];
+  case PPC::XORI:
+    SetNewOpcode(PPC::XORI8);
+    [[fallthrough]];
+  case PPC::ORIS:
+    SetNewOpcode(PPC::ORIS8);
+    [[fallthrough]];
+  case PPC::XORIS:
+    SetNewOpcode(PPC::XORIS8);
+    [[fallthrough]];
+  case PPC::ORI8:
+  case PPC::XORI8:
+  case PPC::ORIS8:
+  case PPC::XORIS8: {
+    Register SrcReg = MI->getOperand(1).getReg();
+    replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+
+    if (!IsReplaceInstr)
+      return;
+    break;
+  }
+  case PPC::AND:
+    SetNewOpcode(PPC::AND8);
+    [[fallthrough]];
+  case PPC::AND8: {
+    if (BinOpDepth < MAX_BINOP_DEPTH) {
+      Register SrcReg1 = MI->getOperand(1).getReg();
+      replaceInstrAfterElimExt32To64(SrcReg1, MRI, BinOpDepth, LV);
+      Register SrcReg2 = MI->getOperand(2).getReg();
+      replaceInstrAfterElimExt32To64(SrcReg2, MRI, BinOpDepth, LV);
+      if (!IsReplaceInstr)
+        return;
+    }
+    break;
+  }
+  case PPC::RLWINM:
+    SetNewOpcode(PPC::RLWINM8);
+    break;
+  case PPC::RLWINM_rec:
+    SetNewOpcode(PPC::RLWINM8_rec);
+    break;
+  case PPC::RLWNM:
+    SetNewOpcode(PPC ::RLWNM8);
+    break;
+  case PPC::RLWNM_rec:
+    SetNewOpcode(PPC::RLWNM8_rec);
+    break;
+  case PPC::ANDC_rec:
+    SetNewOpcode(PPC::ANDC8_rec);
+    break;
+  case PPC::ANDIS_rec:
+    SetNewOpcode(PPC::ANDIS8_rec);
+    break;
+  default:
+    break;
+  }
+
+  const PPCInstrInfo *TII =
+      MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
+  if ((definedBySignExtendingOp(Reg, MRI) && !TII->isZExt32To64(Opcode) &&
+       !isOpZeroOfSubwordPreincLoad(Opcode)) ||
+      IsReplaceInstr) {
+
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+
+    if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
+      return;
+
+    if (!IsReplaceInstr)
+      NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
+
+    assert(NewOpcode != -1 &&
+           "Must have a 64-bit opcode to map the 32-bit opcode!");
+
+    const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+    const MCInstrDesc &MCID = TII->get(NewOpcode);
+
+    Register SrcReg = MI->getOperand(0).getReg();
+    const TargetRegisterClass *NewRC =
+        TRI->getRegClass(MCID.operands()[0].RegClass);
+    const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
+
+    if (NewRC == SrcRC)
+      return;
+
+    DebugLoc DL = MI->getDebugLoc();
+    auto MBB = MI->getParent();
+
+    // Since the pseudo-opcode of the instruction is promoted from 32-bit to
+    // 64-bit, if the operand of the original instruction belongs to
+    // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
+    // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
+    // respectively.
+    DenseMap<unsigned, Register> PromoteRegs;
+    DenseMap<unsigned, Register> ReCalRegs;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++) {
+      MachineOperand &Operand = MI->getOperand(i);
+      if (Operand.isReg()) {
+        Register OperandReg = Operand.getReg();
+        if (!OperandReg.isVirtual())
+          continue;
+
+        const TargetRegisterClass *RC =
+            TRI->getRegClass(MCID.operands()[i].RegClass);
+        const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
+        if (RC != MRI->getRegClass(OperandReg) &&
+            (OrgRC == &PPC::GPRCRegClass ||
+             OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
+          Register TmpReg = MRI->createVirtualRegister(RC);
+          Register DstTmpReg = MRI->createVirtualRegister(RC);
+          BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
+          BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
+              .addReg(TmpReg)
+              .addReg(OperandReg)
+              .addImm(PPC::sub_32);
+          PromoteRegs[i] = DstTmpReg;
+          ReCalRegs[i] = DstTmpReg;
+        } else {
+          ReCalRegs[i] = OperandReg;
+        }
+      }
+    }
+
+    Register NewReg = MRI->createVirtualRegister(NewRC);
+
+    BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewReg);
+    MachineBasicBlock::instr_iterator Iter(MI);
+    --Iter;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++)
+      if (PromoteRegs.find(i) != PromoteRegs.end())
+        MachineInstrBuilder(*Iter->getMF(), Iter)
+            .addReg(PromoteRegs[i], RegState::Kill);
+      else
+        Iter->addOperand(MI->getOperand(i));
+
+    for (auto Iter = ReCalRegs.begin(); Iter != ReCalRegs.end(); Iter++)
+      LV->recomputeForSingleDefVirtReg(Iter->second);
+    MI->eraseFromParent();
+
+    BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
+        .addReg(NewReg, RegState::Kill, PPC::sub_32);
+    LV->recomputeForSingleDefVirtReg(NewReg);
+    return;
+  }
+  return;
+}
+
 // The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
 // does not count all of the recursions. The parameter BinOpDepth is incremented
 // only when isSignOrZeroExtended calls itself more than once. This is done to
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 1e2687f92c61e..032904437ab07 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -17,6 +17,7 @@
 #include "PPC.h"
 #include "PPCRegisterInfo.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
@@ -624,6 +625,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                       const MachineRegisterInfo *MRI) const {
     return isSignOrZeroExtended(Reg, 0, MRI).second;
   }
+  void replaceInstrAfterElimExt32To64(const Register &Reg,
+                                      MachineRegisterInfo *MRI,
+                                      unsigned BinOpDepth,
+                                      LiveVariables *LV) const;
 
   bool convertToImmediateForm(MachineInstr &MI,
                               SmallSet<Register, 4> &RegsToUpdate,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 7929a781dbda8..4b3e422c55126 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2423,8 +2423,7 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 let hasSideEffects = 0 in {
 defm SRAWI : XForm_10rc<31, 824, (outs gprc:$RA), (ins gprc:$RST, u5imm:$RB),
                         "srawi", "$RA, $RST, $RB", IIC_IntShift,
-                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>,
-                        SExt32To64;
+                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>;
 defm CNTLZW : XForm_11r<31,  26, (outs gprc:$RA), (ins gprc:$RST),
                         "cntlzw", "$RA, $RST", IIC_IntGeneral,
                         [(set i32:$RA, (ctlz i32:$RST))]>, ZExt32To64;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index c6db8a7bbeb85..50d57d5148cf0 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1051,6 +1051,7 @@ bool PPCMIPeephole::simplifyCode() {
                    TII->isSignExtended(NarrowReg, MRI)) {
           // We can eliminate EXTSW if the input is known to be already
           // sign-extended.
+          TII->replaceInstrAfterElimExt32To64(NarrowReg, MRI, 0, LV);
           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
           Register TmpReg =
               MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index 93399e5ddbca8..61fdc89dda864 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -216,7 +216,7 @@ let SchedModel = P7Model in {
     RLWNM, RLWNM8, RLWNM_rec, RLDIMI, RLDIMI_rec,
     RLDICL_32, RLDICL_32_64, RLDICL_32_rec, RLDICR_32, RLWINM8_rec, RLWNM8_rec,
     SLD, SLD_rec, SLW, SLW8, SLW_rec, SLW8_rec, SRD, SRD_rec, SRW, SRW8, SRW_rec,
-    SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAD, SRAD_rec, SRAW, SRAW_rec,
+    SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAD, SRAD_rec, SRAW, SRAW_rec, SRAW8, SRAW8_rec,
     SRADI_32, SUBFE, SUBFE8, SUBFE8O_rec, SUBFEO_rec
   )>;
 
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
index dfbf412a93921..fb1739ae3e983 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
@@ -256,7 +256,7 @@ body:             |
     %3 = IMPLICIT_DEF
     %2 = LI 170
     %4 = RLWNM killed %1, %2, 20, 27
-    ; CHECK: RLWINM killed %1, 10, 20, 27
+    ; CHECK: RLWINM8 killed %6, 10, 20, 27
     ; CHECK-LATE: rlwinm 3, 3, 10, 20, 27
     $x3 = EXTSW_32_64 %4
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -604,7 +604,7 @@ body:             |
     %2 = LI 48
     %5 = COPY %0.sub_32
     %8 = SRW killed %5, killed %2
-    ; CHECK: LI 0
+    ; CHECK: LI8 0
     ; CHECK-LATE: li 3, 0
     $x3 = EXTSW_32_64 %8
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -722,7 +722,7 @@ body:             |
     %3 = COPY %0.sub_32
     %4 = SRAW killed %3, killed %2, implicit-def dead $carry
     ; CHECK: LI 48
-    ; CHECK: SRAW killed %3, killed %2, implicit-def dead $carry
+    ; CHECK: SRAW8 killed %7, killed %9, implicit-def $carry, implicit-def dead $carry 
     ; CHECK-LATE: sraw 3, 3, 4
     %5 = EXTSW_32_64 killed %4
     $x3 = COPY %5
@@ -779,7 +779,7 @@ body:             |
     %2 = LI 80
     %3 = COPY %0.sub_32
     %4 = SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
-    ; CHECK: SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
+    ; CHECK: killed %10, killed %12, implicit-def $carry, implicit-def $cr0, implicit-def dead $carry, implicit-def $cr0
     ; CHECK-LATE: sraw. 3, 3, 4
     %5 = COPY killed $cr0
     %6 = ISEL %2, %4, %5.sub_eq
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index 761316ed7726d..a1e4cd38d56ef 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -1348,7 +1348,7 @@ body:             |
     %1 = LI 77
     %2 = ADDI killed %1, 44
     %3 = EXTSW_32_64 killed %2
-    ; CHECK: LI 121
+    ; CHECK: LI8 121
     ; CHECK-LATE: li 3, 121
     $x3 = COPY %3
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -3573,7 +3573,7 @@ body:             |
 
     %0 = LI 777
     %1 = ORI %0, 88
-    ; CHECK: LI 857
+    ; CHECK: LI8 857
     ; CHECK-LATE: li 3, 857
     $x3 = EXTSW_32_64 %1
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -4145,7 +4145,7 @@ body:             |
     %3 = IMPLICIT_DEF
     %2 = LI 17
     %4 = RLWINM killed %2, 4, 20, 27
-    ; CHECK: LI 272
+    ; CHECK: LI8 272
     ; CHECK-LATE: li 3, 272
     $x3 = EXTSW_32_64 %4
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -4873,7 +4873,7 @@ body:             |
     %2 = LI 8
     %5 = COPY %0.sub_32
     %8 = SRW killed %5, killed %2
-    ; CHECK: RLWINM killed %5, 24, 8, 31
+    ; CHECK: RLWINM8 killed %10, 24, 8, 31
     ; CHECK-LATE: srwi 3, 3, 8
     $x3 = EXTSW_32_64 %8
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -6456,7 +6456,7 @@ body:             |
 
     %0 = LI 871
     %1 = XORI %0, 17
-    ; CHECK: LI 886
+    ; CHECK: LI8 886
     ; CHECK-LATE: li 3, 886
     $x3 = EXTSW_32_64 %1
     BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
index 71b1ad5368104..8e3c0862d3cba 100644
--- a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
+++ b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
@@ -496,7 +496,8 @@ body:             |
     %6:g8rc_and_g8rc_nox0 = EXTSB8 killed %84
     %7:gprc = LHZ 6, %64 :: (dereferenceable load (s16) from `ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3)`, !tbaa !3)
     %86:gprc_and_gprc_nor0 = LHA 4, %64 :: (dereferenceable load (s16) from `ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 2)`)
-    ; CHECK:  [[VIRREG1:%[0-9]+]]:gprc_and_gprc_nor0 = LHA 6, %64 :: (dereferenceable load (s16) from `ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3)`, !tbaa !3) 
+    ; CHECK:       [[VIRREG162:%[0-9]+]]:g8rc = LHA8 6, %64
+    ; CHECK-NEXT:  [[VIRREG150:%[0-9]+]]:gprc_and_gprc_nor0 = COPY killed [[VIRREG162]].sub_32
     %87:crrc = CMPW %80, %86
     %88:gprc = ISEL %80, %86, %87.sub_lt
     %89:crrc = CMPLWI killed %88, 0
@@ -544,13 +545,19 @@ body:             |
     %150:gprc_and_gprc_nor0 = EXTSH %7
     %151:gprc_and_gprc_nor0 = EXTSH %0
 
-    ; CHECK:      [[VIRREG2:%[0-9]+]]:gprc_and_gprc_nor0 = EXTSH killed %0 
+    ; CHECK:      [[VIRREG159:%[0-9]+]]:g8rc = IMPLICIT_DEF
+    ; CHECK-NEXT: [[VIRREG160:%[0-9]+]]:g8rc = INSERT_SUBREG [[VIRREG159]], %0, %subreg.sub_32
+    ; CHECK-NEXT: [[VIRREG161:%[0-9]+]]:g8rc = EXTSH8 killed [[VIRREG160]]
+    ; CHECK-NEXT: %151:gprc_and_gprc_nor0 = COPY killed [[VIRREG161]].sub_32
+
     %152:crrc = CMPW %151, %150
     %153:gprc = ISEL %151, %150, %152.sub_lt
     %154:g8rc = EXTSW_32_64 killed %153
-    ; CHECK-NEXT: [[VIRREG3:%[0-9]+]]:crrc = CMPW [[VIRREG2]], [[VIRREG1]]
-    ; CHECK-NEXT: %153:gprc = ISEL killed [[VIRREG2]], killed [[VIRREG1]], killed [[VIRREG3]].sub_lt
-    ; CHECK-NOT: EXTSW_32_64
+    ; CHECK:      [[VIRREG165:%[0-9]+]]:g8rc = IMPLICIT_DEF
+    ; CHECK-NEXT: [[VIRREG166:%[0-9]+]]:g8rc = INSERT_SUBREG [[VIRREG165]], [[VIRREG150]], %subreg.sub_32
+    ; CHECK-NEXT: [[VIRREG167:%[0-9]+]]:g8rc = ISEL8 killed %164, killed [[VIRREG166]], %152.sub_lt
+    ; CHECK-NEXT: %{{[0-9]+}}:gprc = COPY killed [[VIRREG167]].sub_32
+    ; CHECK-NOT:  EXTSW_32_64
     %155:g8rc_and_g8rc_nox0 = LDtoc @computedResultUll, $x2 :: (load (s64) from got)
     STD %154, 0, killed %155 :: (store (s64) into @computedResultUll, !tbaa !7)
     ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
index a2a5c6c5eafb7..13cdcd9079fc7 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
@@ -937,6 +937,8 @@ define i64 @setbsc3(i4 %a, i4 %b) {
 ; CHECK-PWR8-NEXT:    slwi r3, r3, 28
 ; CHECK-PWR8-NEXT:    srawi r4, r4, 28
 ; CHECK-PWR8-NEXT:    srawi r3, r3, 28
+; CHECK-PWR8-NEXT:    extsw r4, r4
+; CHECK-PWR8-NEXT:    extsw r3, r3
 ; CHECK-PWR8-NEXT:    cmpw r3, r4
 ; CHECK-PWR8-NEXT:    sub r5, r4, r3
 ; CHECK-PWR8-NEXT:    li r3, -1
diff --git a/llvm/test/CodeGen/PowerPC/select-constant-xor.ll b/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
index b40a21b82e836..52a763262f59b 100644
--- a/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
@@ -42,6 +42,7 @@ define i64 @selecti32i64(i32 %a) {
 ; CHECK-LABEL: selecti32i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    srawi 3, 3, 31
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 65535
 ; CHECK-NEXT:    xoris 3, 3, 32767
 ; CHECK-NEXT:    blr
@@ -68,6 +69,7 @@ define i32 @selecti32i32(i32 %a) {
 ; CHECK-LABEL: selecti32i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    srawi 3, 3, 31
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 84
 ; CHECK-NEXT:    blr
   %c = icmp sgt i32 %a, -1
@@ -79,6 +81,7 @@ define i8 @selecti32i8(i32 %a) {
 ; CHECK-LABEL: selecti32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    srawi 3, 3, 31
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 84
 ; CHECK-NEXT:    blr
   %c = icmp sgt i32 %a, -1
@@ -91,6 +94,7 @@ define i32 @selecti8i32(i8 %a) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    extsb 3, 3
 ; CHECK-NEXT:    srawi 3, 3, 7
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 84
 ; CHECK-NEXT:    blr
   %c = icmp sgt i8 %a, -1
diff --git a/llvm/test/CodeGen/PowerPC/sext_elimination.mir b/llvm/test/CodeGen/PowerPC/sext_elimination.mir
index e920848a4137c..bf6b9005fcf7f 100644
--- a/llvm/test/CodeGen/PowerPC/sext_elimination.mir
+++ b/llvm/test/CodeGen/PowerPC/sext_elimination.mir
@@ -41,8 +41,14 @@ body:             |
     ; CHECK: %4:g8rc = EXTSW_32_64 killed %3
     ; CHECK: %5:g8rc = INSERT_SUBREG %15, %1, %subreg.sub_32
     ; CHECK: %7:g8rc = EXTSW_32_64 killed %6
-    ; CHECK: %9:g8rc = INSERT_SUBREG %16, %8, %subreg.sub_32
-    ; CHECK: %11:g8rc = INSERT_SUBREG %17, %10, %subreg.sub_32
+    ; CHECK: %17:g8rc = INSERT_SUBREG %16, %1, %subreg.sub_32
+    ; CHECK-NEXT: %18:g8rc = ORIS8 killed %17, 32767
+    ; CHECK-NEXT: %8:gprc = COPY killed %18.sub_32
+    ; CHECK: %9:g8rc = INSERT_SUBREG %19, %8, %subreg.sub_32
+    ; CHECK: %21:g8rc = INSERT_SUBREG %20, %1, %subreg.sub_32
+    ; CHECK-NEXT: %22:g8rc = ORI8 killed %21, 32768
+    ; CHECK-NEXT: %10:gprc = COPY killed %22.sub_32
+    ; CHECK: %11:g8rc = INSERT_SUBREG %23, %10, %subreg.sub_32
     ; CHECK: %14:g8rc = COPY killed %13
 
     %0:g8rc_nox0 = COPY $x3
diff --git a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
index 8748767501bd0..bc12276060a8b 100644
--- a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
@@ -19,9 +19,10 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
 ; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; CHECK-NEXT:    cmpwi 2, 3, 2
 ; CHECK-NEXT:    li 4, 0
+; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $x4
+; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    std 0, 800(1)
 ; CHECK-NEXT:    mr 31, 1
-; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blt 2, .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %if.end
 ; CHECK-NEXT:    addi 3, 31, 112
@@ -65,6 +66,7 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
 ; BE-NEXT:    stdu 1, -800(1)
 ; BE-NEXT:    li 4, 0
 ; BE-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; BE-NEXT:    # kill: def $r4 killed $r4 killed $x4
 ; BE-NEXT:    cmpwi 2, 3, 2
 ; BE-NEXT:    mr 3, 4
 ; BE-NEXT:    std 0, 816(1)
diff --git a/llvm/test/CodeGen/PowerPC/store-forward-be64.ll b/llvm/test/CodeGen/PowerPC/store-forward-be64.ll
index 32e67c7ce127a..720fec96eddc2 100644
--- a/llvm/test/CodeGen/PowerPC/store-forward-be64.ll
+++ b/llvm/test/CodeGen/PowerPC/store-forward-be64.ll
@@ -51,6 +51,7 @@ define signext i32 @stc1(ptr noundef byval(%struct.SST) align 8 %s) {
 ; CHECK-NEXT:    std 4, 48(1)
 ; CHECK-NEXT:    extsh 3, 3
 ; CHECK-NEXT:    srawi 3, 3, 8
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i16, ptr %s, align 8
>From d4b4abcddbe56cd2e9b96f5ad4233d185c09b121 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 30 Apr 2024 14:22:00 -0400
Subject: [PATCH 02/15] add srawi8
---
 llvm/lib/Target/PowerPC/P10InstrResources.td | 5 +++--
 llvm/lib/Target/PowerPC/P9InstrResources.td  | 6 ++++--
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td     | 5 +++++
 llvm/lib/Target/PowerPC/PPCInstrInfo.td      | 3 ++-
 llvm/lib/Target/PowerPC/PPCScheduleP7.td     | 2 +-
 5 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 2e0c10344fd07..92af04a4cff2d 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -825,7 +825,8 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
 def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
       (instrs
     SRADI_rec,
-    SRAWI_rec
+    SRAWI_rec,
+    SRAWI8_rec
 )>;
 
 // Single crack instructions
@@ -927,7 +928,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
     SETNBC, SETNBC8,
     SETNBCR, SETNBCR8,
     SRADI, SRADI_32,
-    SRAWI,
+    SRAWI, SRAWI8,
     SUBFIC, SUBFIC8,
     SUBFME, SUBFME8,
     SUBFME8O, SUBFMEO,
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 263b238a3674f..801ae83cd07c4 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -189,7 +189,8 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
     (instregex "F(N)?ABS(D|S)$"),
     (instregex "FNEG(D|S)$"),
     (instregex "FCPSGN(D|S)$"),
-    (instregex "SRAW(I|8)?$"),
+    (instregex "SRAW(8)?$"),
+    (instregex "SRAWI(8)?$"),
     (instregex "ISEL(8)?$"),
     RLDIMI,
     XSIEXPDP,
@@ -1091,7 +1092,8 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
     (instregex "RLD(I)?C(R|L)_rec$"),
     (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
     (instregex "SLW(8)?_rec$"),
-    (instregex "SRAW(8|I)?_rec$"),
+    (instregex "SRAW(8)?_rec$"),
+    (instregex "SRAWI(8)?_rec$"),
     (instregex "SRW(8)?_rec$"),
     RLDICL_32_rec,
     RLDIMI_rec
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 8295e9f827bcd..bc8e36dca9099 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -937,6 +937,11 @@ defm SRAW8 : XForm_6rc<31, 792, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
                       "sraw", "$RA, $RST, $RB", IIC_IntShift,
                       [(set i64:$RA, (PPCsra i64:$RST, i64:$RB))]>, SExt32To64;
 
+defm SRAWI8 : XForm_10rc<31, 824, (outs g8rc:$RA), (ins g8rc:$RST, u5imm:$RB),
+                        "srawi", "$RA, $RST, $RB", IIC_IntShift,
+                        [(set i64:$RA, (sra i64:$RST, (i64 imm:$RB)))]>, isPPC64,
+                        SExt32To64;
+
 } // Interpretation64Bit
 
 // For fast-isel:
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 4b3e422c55126..7929a781dbda8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2423,7 +2423,8 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 let hasSideEffects = 0 in {
 defm SRAWI : XForm_10rc<31, 824, (outs gprc:$RA), (ins gprc:$RST, u5imm:$RB),
                         "srawi", "$RA, $RST, $RB", IIC_IntShift,
-                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>;
+                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>,
+                        SExt32To64;
 defm CNTLZW : XForm_11r<31,  26, (outs gprc:$RA), (ins gprc:$RST),
                         "cntlzw", "$RA, $RST", IIC_IntGeneral,
                         [(set i32:$RA, (ctlz i32:$RST))]>, ZExt32To64;
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
index 61fdc89dda864..bf7f2f7a9c999 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -216,7 +216,7 @@ let SchedModel = P7Model in {
     RLWNM, RLWNM8, RLWNM_rec, RLDIMI, RLDIMI_rec,
     RLDICL_32, RLDICL_32_64, RLDICL_32_rec, RLDICR_32, RLWINM8_rec, RLWNM8_rec,
     SLD, SLD_rec, SLW, SLW8, SLW_rec, SLW8_rec, SRD, SRD_rec, SRW, SRW8, SRW_rec,
-    SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAD, SRAD_rec, SRAW, SRAW_rec, SRAW8, SRAW8_rec,
+    SRW8_rec, SRADI, SRADI_rec, SRAWI, SRAWI_rec, SRAWI8, SRAWI8_rec, SRAD, SRAD_rec, SRAW, SRAW_rec, SRAW8, SRAW8_rec,
     SRADI_32, SUBFE, SUBFE8, SUBFE8O_rec, SUBFEO_rec
   )>;
 
>From affb9bffea34193e7862f3e354e4050c94a88a14 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 1 May 2024 12:06:08 -0400
Subject: [PATCH 03/15] add SRAWI8 record
---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td                   | 4 +---
 .../PowerPC/convert-rr-to-ri-instrs-out-of-range.mir       | 2 +-
 llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir      | 7 +++++--
 llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll                 | 2 --
 llvm/test/CodeGen/PowerPC/select-constant-xor.ll           | 4 ----
 llvm/test/CodeGen/PowerPC/store-forward-be64.ll            | 1 -
 6 files changed, 7 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index bc8e36dca9099..ede20e3876b04 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -938,9 +938,7 @@ defm SRAW8 : XForm_6rc<31, 792, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
                       [(set i64:$RA, (PPCsra i64:$RST, i64:$RB))]>, SExt32To64;
 
 defm SRAWI8 : XForm_10rc<31, 824, (outs g8rc:$RA), (ins g8rc:$RST, u5imm:$RB),
-                        "srawi", "$RA, $RST, $RB", IIC_IntShift,
-                        [(set i64:$RA, (sra i64:$RST, (i64 imm:$RB)))]>, isPPC64,
-                        SExt32To64;
+            "srawi", "$RA, $RST, $RB", IIC_IntShift, []>, SExt32To64;
 
 } // Interpretation64Bit
 
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
index fb1739ae3e983..cd48034d61a12 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
@@ -779,7 +779,7 @@ body:             |
     %2 = LI 80
     %3 = COPY %0.sub_32
     %4 = SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
-    ; CHECK: killed %10, killed %12, implicit-def $carry, implicit-def $cr0, implicit-def dead $carry, implicit-def $cr0
+    ; CHECK: SRAW8_rec killed %10, killed %12, implicit-def $carry, implicit-def $cr0, implicit-def dead $carry, implicit-def $cr0
     ; CHECK-LATE: sraw. 3, 3, 4
     %5 = COPY killed $cr0
     %6 = ISEL %2, %4, %5.sub_eq
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index a1e4cd38d56ef..5ffdefe1960fc 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -4990,7 +4990,10 @@ body:             |
     %2 = LI 15
     %3 = COPY %0.sub_32
     %4 = SRAW killed %3, killed %2, implicit-def dead $carry
-    ; CHECK: SRAWI killed %3, 15, implicit-def dead $carry
+    ; CHECK:      %6:g8rc = IMPLICIT_DEF
+    ; CHECK-NEXT: %7:g8rc = INSERT_SUBREG %6, killed %3, %subreg.sub_32
+    ; CHECK-NEXT: %8:g8rc = SRAWI8 killed %7, 15, implicit-def $carry, implicit-def dead $carry
+    ; CHECK-NEXT: %4:gprc = COPY killed %8.sub_32
     ; CHECK-LATE: srawi 3, 3, 15
     %5 = EXTSW_32_64 killed %4
     $x3 = COPY %5
@@ -5047,7 +5050,7 @@ body:             |
     %2 = LI 8
     %3 = COPY %0.sub_32
     %4 = SRAW_rec killed %3, %2, implicit-def dead $carry, implicit-def $cr0
-    ; CHECK: SRAWI_rec killed %3, 8, implicit-def dead $carry, implicit-def $cr0
+    ; CHECK:  %11:g8rc = SRAWI8_rec killed %10, 8, implicit-def $carry, implicit-def $cr0, implicit-def dead $carry, implicit-def $cr0
     ; CHECK-LATE: srawi. 3, 3, 8
     %5 = COPY killed $cr0
     %6 = ISEL %2, %4, %5.sub_eq
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
index 13cdcd9079fc7..a2a5c6c5eafb7 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
@@ -937,8 +937,6 @@ define i64 @setbsc3(i4 %a, i4 %b) {
 ; CHECK-PWR8-NEXT:    slwi r3, r3, 28
 ; CHECK-PWR8-NEXT:    srawi r4, r4, 28
 ; CHECK-PWR8-NEXT:    srawi r3, r3, 28
-; CHECK-PWR8-NEXT:    extsw r4, r4
-; CHECK-PWR8-NEXT:    extsw r3, r3
 ; CHECK-PWR8-NEXT:    cmpw r3, r4
 ; CHECK-PWR8-NEXT:    sub r5, r4, r3
 ; CHECK-PWR8-NEXT:    li r3, -1
diff --git a/llvm/test/CodeGen/PowerPC/select-constant-xor.ll b/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
index 52a763262f59b..b40a21b82e836 100644
--- a/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
@@ -42,7 +42,6 @@ define i64 @selecti32i64(i32 %a) {
 ; CHECK-LABEL: selecti32i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    srawi 3, 3, 31
-; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 65535
 ; CHECK-NEXT:    xoris 3, 3, 32767
 ; CHECK-NEXT:    blr
@@ -69,7 +68,6 @@ define i32 @selecti32i32(i32 %a) {
 ; CHECK-LABEL: selecti32i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    srawi 3, 3, 31
-; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 84
 ; CHECK-NEXT:    blr
   %c = icmp sgt i32 %a, -1
@@ -81,7 +79,6 @@ define i8 @selecti32i8(i32 %a) {
 ; CHECK-LABEL: selecti32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    srawi 3, 3, 31
-; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 84
 ; CHECK-NEXT:    blr
   %c = icmp sgt i32 %a, -1
@@ -94,7 +91,6 @@ define i32 @selecti8i32(i8 %a) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    extsb 3, 3
 ; CHECK-NEXT:    srawi 3, 3, 7
-; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 84
 ; CHECK-NEXT:    blr
   %c = icmp sgt i8 %a, -1
diff --git a/llvm/test/CodeGen/PowerPC/store-forward-be64.ll b/llvm/test/CodeGen/PowerPC/store-forward-be64.ll
index 720fec96eddc2..32e67c7ce127a 100644
--- a/llvm/test/CodeGen/PowerPC/store-forward-be64.ll
+++ b/llvm/test/CodeGen/PowerPC/store-forward-be64.ll
@@ -51,7 +51,6 @@ define signext i32 @stc1(ptr noundef byval(%struct.SST) align 8 %s) {
 ; CHECK-NEXT:    std 4, 48(1)
 ; CHECK-NEXT:    extsh 3, 3
 ; CHECK-NEXT:    srawi 3, 3, 8
-; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i16, ptr %s, align 8
>From 75494c3abb5cd6fa01b0429257e7ee839092d41b Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 1 May 2024 14:11:35 -0400
Subject: [PATCH 04/15] delete select pattern for SRAW8
---
 llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index ede20e3876b04..ab078ecc81b66 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -935,7 +935,7 @@ defm SRW8  : XForm_6r<31, 536, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
 
 defm SRAW8 : XForm_6rc<31, 792, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
                       "sraw", "$RA, $RST, $RB", IIC_IntShift,
-                      [(set i64:$RA, (PPCsra i64:$RST, i64:$RB))]>, SExt32To64;
+                      []>, SExt32To64;
 
 defm SRAWI8 : XForm_10rc<31, 824, (outs g8rc:$RA), (ins g8rc:$RST, u5imm:$RB),
             "srawi", "$RA, $RST, $RB", IIC_IntShift, []>, SExt32To64;
>From 489ccccffddb8f2ebb69d0da8b4cfc0c0b7fb7e3 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 1 May 2024 17:15:34 -0400
Subject: [PATCH 05/15] address comment
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp  | 53 +++++++++++++++--------
 llvm/lib/Target/PowerPC/PPCInstrInfo.h    |  2 +-
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 12 ++++-
 3 files changed, 46 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index e116d19bdf95a..b45800d3d904f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5235,7 +5235,21 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
 // (e.g. AND) to avoid excessive cost.
 const unsigned MAX_BINOP_DEPTH = 1;
 
-void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
+// The `PromoteSignExtendedInstr32To64` function is recursive. The parameter
+// BinOpDepth  does not count all of the recursions. The parameter BinOpDepth is
+// incremented  only when `PromoteSignExtendedInstr32To64` calls itself more
+// than once. This is done to prevent exponential recursion. The function will
+// promote the instruction which defines the register `Reg` in the parameter
+// from a 32-bit to a 64-bit instruction if needed. Additionally, all the used
+// and defined registers in the instruction may also need to be promoted from
+// 32-bit to 64-bit based on the promoted instruction description. If a used
+// register is promoted to 64-bit, the instruction which defines the promoted
+// register also needs to be promoted. After an instruction is promoted to 64
+// bits, the defined register of the promoted instruction is also 64-bit. A
+// defined register may be used by other instructions; in such cases,
+//  we need to extract the 32-bit register used by other
+//  non-promoted 32-bit instructions from the promoted 64-bit register.
+void PPCInstrInfo::PromoteSignExtendedInstr32To64(const Register &Reg,
                                                   MachineRegisterInfo *MRI,
                                                   unsigned BinOpDepth,
                                                   LiveVariables *LV) const {
@@ -5244,13 +5258,13 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
     return;
 
   unsigned Opcode = MI->getOpcode();
-  bool IsReplaceInstr = false;
+  bool IsPromotedInstr = false;
   int NewOpcode = -1;
 
   auto SetNewOpcode = [&](int NewOpc) {
-    if (!IsReplaceInstr) {
+    if (!IsPromotedInstr) {
       NewOpcode = NewOpc;
-      IsReplaceInstr = true;
+      IsPromotedInstr = true;
     }
   };
 
@@ -5270,13 +5284,13 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
         OperandStride = 2;
       }
 
-      for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
+      for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
         assert(MI->getOperand(I).isReg() && "Operand must be register");
         Register SrcReg = MI->getOperand(I).getReg();
-        replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth + 1, LV);
+        PromoteSignExtendedInstr32To64(SrcReg, MRI, BinOpDepth + 1, LV);
       }
 
-      if (!IsReplaceInstr)
+      if (!IsPromotedInstr)
         return;
     }
     break;
@@ -5284,7 +5298,7 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
     Register SrcReg = MI->getOperand(1).getReg();
     const MachineFunction *MF = MI->getMF();
     if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
-      replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+      PromoteSignExtendedInstr32To64(SrcReg, MRI, BinOpDepth, LV);
       return;
     }
     // From here on everything is SVR4ABI
@@ -5292,7 +5306,7 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
       return;
 
     if (SrcReg != PPC::X3) {
-      replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+      PromoteSignExtendedInstr32To64(SrcReg, MRI, BinOpDepth, LV);
       return;
     }
   }
@@ -5314,9 +5328,10 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
   case PPC::ORIS8:
   case PPC::XORIS8: {
     Register SrcReg = MI->getOperand(1).getReg();
-    replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
-
-    if (!IsReplaceInstr)
+    PromoteSignExtendedInstr32To64(SrcReg, MRI, BinOpDepth, LV);
+    // If Opcode is PPC::ORI8, PPC::XORI8, PPC::ORIS8, or PPC::XORIS8,
+    // the instruction does not need to be promoted.
+    if (!IsPromotedInstr)
       return;
     break;
   }
@@ -5326,10 +5341,11 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
   case PPC::AND8: {
     if (BinOpDepth < MAX_BINOP_DEPTH) {
       Register SrcReg1 = MI->getOperand(1).getReg();
-      replaceInstrAfterElimExt32To64(SrcReg1, MRI, BinOpDepth, LV);
+      PromoteSignExtendedInstr32To64(SrcReg1, MRI, BinOpDepth, LV);
       Register SrcReg2 = MI->getOperand(2).getReg();
-      replaceInstrAfterElimExt32To64(SrcReg2, MRI, BinOpDepth, LV);
-      if (!IsReplaceInstr)
+      PromoteSignExtendedInstr32To64(SrcReg2, MRI, BinOpDepth, LV);
+      // If Opcode is PPC::AND8, the instruction does not need to be promoted.
+      if (!IsPromotedInstr)
         return;
     }
     break;
@@ -5360,14 +5376,14 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
       MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
   if ((definedBySignExtendingOp(Reg, MRI) && !TII->isZExt32To64(Opcode) &&
        !isOpZeroOfSubwordPreincLoad(Opcode)) ||
-      IsReplaceInstr) {
+      IsPromotedInstr) {
 
     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
 
     if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
       return;
 
-    if (!IsReplaceInstr)
+    if (!IsPromotedInstr)
       NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
 
     assert(NewOpcode != -1 &&
@@ -5388,7 +5404,7 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
     auto MBB = MI->getParent();
 
     // Since the pseudo-opcode of the instruction is promoted from 32-bit to
-    // 64-bit, if the operand of the original instruction belongs to
+    // 64-bit, if the operand reg class of the original instruction belongs to
     // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
     // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
     // respectively.
@@ -5438,6 +5454,7 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
       LV->recomputeForSingleDefVirtReg(Iter->second);
     MI->eraseFromParent();
 
+    // Demote the 64-bit defined regster to a 32-bit register.
     BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
         .addReg(NewReg, RegState::Kill, PPC::sub_32);
     LV->recomputeForSingleDefVirtReg(NewReg);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 032904437ab07..1412a67ca537f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -625,7 +625,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                       const MachineRegisterInfo *MRI) const {
     return isSignOrZeroExtended(Reg, 0, MRI).second;
   }
-  void replaceInstrAfterElimExt32To64(const Register &Reg,
+  void PromoteSignExtendedInstr32To64(const Register &Reg,
                                       MachineRegisterInfo *MRI,
                                       unsigned BinOpDepth,
                                       LiveVariables *LV) const;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 50d57d5148cf0..9a6884175a5a1 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1050,8 +1050,16 @@ bool PPCMIPeephole::simplifyCode() {
         } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
                    TII->isSignExtended(NarrowReg, MRI)) {
           // We can eliminate EXTSW if the input is known to be already
-          // sign-extended.
-          TII->replaceInstrAfterElimExt32To64(NarrowReg, MRI, 0, LV);
+          // sign-extended. but we are not sure whether a spill will occur
+          // during register allocation. All these instructions in the chain
+          // used to deduce sign extension to eliminate the 'extsw' will need to
+          // be promoted to 64-bit pseudo instructions when the 'extsw' is
+          // eliminated. If there is no promotion, it will use the 'stw' instead
+          // of 'std', and 'lwz' instead of 'ld' when spilling, since the
+          // register class is 32-bits. Consequently, the high 32-bit
+          // information will be lost.
+          TII->PromoteSignExtendedInstr32To64(NarrowReg, MRI, 0, LV);
+
           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
           Register TmpReg =
               MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
>From 1ece68c0f9f2b1f574015e39bd7fd68bb346c577 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 2 May 2024 10:03:16 -0400
Subject: [PATCH 06/15] change function name and variable name
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp  | 55 ++++++++++++-----------
 llvm/lib/Target/PowerPC/PPCInstrInfo.h    |  2 +-
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp |  2 +-
 3 files changed, 32 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index b45800d3d904f..bbd7e7c69112e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5235,9 +5235,9 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
 // (e.g. AND) to avoid excessive cost.
 const unsigned MAX_BINOP_DEPTH = 1;
 
-// The `PromoteSignExtendedInstr32To64` function is recursive. The parameter
+// The `PromoteInstr32To64ForEmliEXTSW` function is recursive. The parameter
 // BinOpDepth  does not count all of the recursions. The parameter BinOpDepth is
-// incremented  only when `PromoteSignExtendedInstr32To64` calls itself more
+// incremented  only when `PromoteInstr32To64ForEmliEXTSW` calls itself more
 // than once. This is done to prevent exponential recursion. The function will
 // promote the instruction which defines the register `Reg` in the parameter
 // from a 32-bit to a 64-bit instruction if needed. Additionally, all the used
@@ -5249,7 +5249,7 @@ const unsigned MAX_BINOP_DEPTH = 1;
 // defined register may be used by other instructions; in such cases,
 //  we need to extract the 32-bit register used by other
 //  non-promoted 32-bit instructions from the promoted 64-bit register.
-void PPCInstrInfo::PromoteSignExtendedInstr32To64(const Register &Reg,
+void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
                                                   MachineRegisterInfo *MRI,
                                                   unsigned BinOpDepth,
                                                   LiveVariables *LV) const {
@@ -5258,22 +5258,27 @@ void PPCInstrInfo::PromoteSignExtendedInstr32To64(const Register &Reg,
     return;
 
   unsigned Opcode = MI->getOpcode();
-  bool IsPromotedInstr = false;
+  bool IsNonSignedExtInstrPromoted = false;
   int NewOpcode = -1;
 
-  auto SetNewOpcode = [&](int NewOpc) {
-    if (!IsPromotedInstr) {
+  auto CheckAndSetNewOpcode = [&](int NewOpc) {
+    if (!IsNonSignedExtInstrPromoted) {
       NewOpcode = NewOpc;
-      IsPromotedInstr = true;
+      IsNonSignedExtInstrPromoted = true;
     }
   };
 
+  auto SetNewOpcode = [&](int NewOpc) {
+    NewOpcode = NewOpc;
+    IsNonSignedExtInstrPromoted = true;
+  };
+
   switch (Opcode) {
   case PPC::OR:
-    SetNewOpcode(PPC::OR8);
+    CheckAndSetNewOpcode(PPC::OR8);
     [[fallthrough]];
   case PPC::ISEL:
-    SetNewOpcode(PPC::ISEL8);
+    CheckAndSetNewOpcode(PPC::ISEL8);
     [[fallthrough]];
   case PPC::OR8:
   case PPC::PHI:
@@ -5287,10 +5292,10 @@ void PPCInstrInfo::PromoteSignExtendedInstr32To64(const Register &Reg,
       for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
         assert(MI->getOperand(I).isReg() && "Operand must be register");
         Register SrcReg = MI->getOperand(I).getReg();
-        PromoteSignExtendedInstr32To64(SrcReg, MRI, BinOpDepth + 1, LV);
+        PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
       }
 
-      if (!IsPromotedInstr)
+      if (!IsNonSignedExtInstrPromoted)
         return;
     }
     break;
@@ -5298,7 +5303,7 @@ void PPCInstrInfo::PromoteSignExtendedInstr32To64(const Register &Reg,
     Register SrcReg = MI->getOperand(1).getReg();
     const MachineFunction *MF = MI->getMF();
     if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
-      PromoteSignExtendedInstr32To64(SrcReg, MRI, BinOpDepth, LV);
+      PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
       return;
     }
     // From here on everything is SVR4ABI
@@ -5306,46 +5311,46 @@ void PPCInstrInfo::PromoteSignExtendedInstr32To64(const Register &Reg,
       return;
 
     if (SrcReg != PPC::X3) {
-      PromoteSignExtendedInstr32To64(SrcReg, MRI, BinOpDepth, LV);
+      PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
       return;
     }
   }
     return;
   case PPC::ORI:
-    SetNewOpcode(PPC::ORI8);
+    CheckAndSetNewOpcode(PPC::ORI8);
     [[fallthrough]];
   case PPC::XORI:
-    SetNewOpcode(PPC::XORI8);
+    CheckAndSetNewOpcode(PPC::XORI8);
     [[fallthrough]];
   case PPC::ORIS:
-    SetNewOpcode(PPC::ORIS8);
+    CheckAndSetNewOpcode(PPC::ORIS8);
     [[fallthrough]];
   case PPC::XORIS:
-    SetNewOpcode(PPC::XORIS8);
+    CheckAndSetNewOpcode(PPC::XORIS8);
     [[fallthrough]];
   case PPC::ORI8:
   case PPC::XORI8:
   case PPC::ORIS8:
   case PPC::XORIS8: {
     Register SrcReg = MI->getOperand(1).getReg();
-    PromoteSignExtendedInstr32To64(SrcReg, MRI, BinOpDepth, LV);
+    PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
     // If Opcode is PPC::ORI8, PPC::XORI8, PPC::ORIS8, or PPC::XORIS8,
     // the instruction does not need to be promoted.
-    if (!IsPromotedInstr)
+    if (!IsNonSignedExtInstrPromoted)
       return;
     break;
   }
   case PPC::AND:
-    SetNewOpcode(PPC::AND8);
+    CheckAndSetNewOpcode(PPC::AND8);
     [[fallthrough]];
   case PPC::AND8: {
     if (BinOpDepth < MAX_BINOP_DEPTH) {
       Register SrcReg1 = MI->getOperand(1).getReg();
-      PromoteSignExtendedInstr32To64(SrcReg1, MRI, BinOpDepth, LV);
+      PromoteInstr32To64ForEmliEXTSW(SrcReg1, MRI, BinOpDepth, LV);
       Register SrcReg2 = MI->getOperand(2).getReg();
-      PromoteSignExtendedInstr32To64(SrcReg2, MRI, BinOpDepth, LV);
+      PromoteInstr32To64ForEmliEXTSW(SrcReg2, MRI, BinOpDepth, LV);
       // If Opcode is PPC::AND8, the instruction does not need to be promoted.
-      if (!IsPromotedInstr)
+      if (!IsNonSignedExtInstrPromoted)
         return;
     }
     break;
@@ -5376,14 +5381,14 @@ void PPCInstrInfo::PromoteSignExtendedInstr32To64(const Register &Reg,
       MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
   if ((definedBySignExtendingOp(Reg, MRI) && !TII->isZExt32To64(Opcode) &&
        !isOpZeroOfSubwordPreincLoad(Opcode)) ||
-      IsPromotedInstr) {
+      IsNonSignedExtInstrPromoted) {
 
     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
 
     if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
       return;
 
-    if (!IsPromotedInstr)
+    if (!IsNonSignedExtInstrPromoted)
       NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
 
     assert(NewOpcode != -1 &&
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 1412a67ca537f..cdf533e78e496 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -625,7 +625,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                       const MachineRegisterInfo *MRI) const {
     return isSignOrZeroExtended(Reg, 0, MRI).second;
   }
-  void PromoteSignExtendedInstr32To64(const Register &Reg,
+  void PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
                                       MachineRegisterInfo *MRI,
                                       unsigned BinOpDepth,
                                       LiveVariables *LV) const;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 9a6884175a5a1..896ed100e4780 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1058,7 +1058,7 @@ bool PPCMIPeephole::simplifyCode() {
           // of 'std', and 'lwz' instead of 'ld' when spilling, since the
           // register class is 32-bits. Consequently, the high 32-bit
           // information will be lost.
-          TII->PromoteSignExtendedInstr32To64(NarrowReg, MRI, 0, LV);
+          TII->PromoteInstr32To64ForEmliEXTSW(NarrowReg, MRI, 0, LV);
 
           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
           Register TmpReg =
>From 56b04f5100df0c1af4805e187af24c9685e0bf41 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 8 May 2024 14:04:01 -0400
Subject: [PATCH 07/15] add more comment
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index bbd7e7c69112e..b8bb4f677a5bc 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5396,12 +5396,15 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
 
     const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
     const MCInstrDesc &MCID = TII->get(NewOpcode);
-
-    Register SrcReg = MI->getOperand(0).getReg();
     const TargetRegisterClass *NewRC =
         TRI->getRegClass(MCID.operands()[0].RegClass);
+
+    Register SrcReg = MI->getOperand(0).getReg();
     const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
 
+    // If the register class of the defined register in the 32-bit instruction
+    // is the same as the register class of the defined register in the promoted
+    // 64-bit instruction, we do not need to promote the instruction.
     if (NewRC == SrcRC)
       return;
 
@@ -5422,14 +5425,15 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
         if (!OperandReg.isVirtual())
           continue;
 
-        const TargetRegisterClass *RC =
+        const TargetRegisterClass *NewUsedRegRC =
             TRI->getRegClass(MCID.operands()[i].RegClass);
         const TargetRegisterClass *OrgRC = MRI->getRegClass(OperandReg);
-        if (RC != MRI->getRegClass(OperandReg) &&
+        if (NewUsedRegRC != OrgRC &&
             (OrgRC == &PPC::GPRCRegClass ||
              OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
-          Register TmpReg = MRI->createVirtualRegister(RC);
-          Register DstTmpReg = MRI->createVirtualRegister(RC);
+          // Promote the used 32-bit register to 64-bit register.
+          Register TmpReg = MRI->createVirtualRegister(NewUsedRegRC);
+          Register DstTmpReg = MRI->createVirtualRegister(NewUsedRegRC);
           BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
           BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
               .addReg(TmpReg)
@@ -5443,9 +5447,9 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
       }
     }
 
-    Register NewReg = MRI->createVirtualRegister(NewRC);
+    Register NewDefinedReg = MRI->createVirtualRegister(NewRC);
 
-    BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewReg);
+    BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
     MachineBasicBlock::instr_iterator Iter(MI);
     --Iter;
     for (unsigned i = 1; i < MI->getNumOperands(); i++)
@@ -5461,8 +5465,8 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
 
     // Demote the 64-bit defined regster to a 32-bit register.
     BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
-        .addReg(NewReg, RegState::Kill, PPC::sub_32);
-    LV->recomputeForSingleDefVirtReg(NewReg);
+        .addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
+    LV->recomputeForSingleDefVirtReg(NewDefinedReg);
     return;
   }
   return;
>From 692138bae12d156802e759354c68af93cb76130b Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 9 May 2024 11:38:24 -0400
Subject: [PATCH 08/15] modify comment
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp  | 60 +++++++++++++----------
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp | 16 +++---
 2 files changed, 43 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index b8bb4f677a5bc..71a13cb73b9f3 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5235,20 +5235,15 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
 // (e.g. AND) to avoid excessive cost.
 const unsigned MAX_BINOP_DEPTH = 1;
 
-// The `PromoteInstr32To64ForEmliEXTSW` function is recursive. The parameter
-// BinOpDepth  does not count all of the recursions. The parameter BinOpDepth is
+// The function will promote the instruction which defines the register `Reg`
+// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
+// used to check whether an instruction needs to be promoted or not is similar
+// to the logic used to check a defined register whether is isSignOrZeroExtended
+// or not in the function PPCInstrInfo::isSignOrZeroExtended. The
+// `PromoteInstr32To64ForEmliEXTSW` function is recursive. The parameter
+// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
 // incremented  only when `PromoteInstr32To64ForEmliEXTSW` calls itself more
-// than once. This is done to prevent exponential recursion. The function will
-// promote the instruction which defines the register `Reg` in the parameter
-// from a 32-bit to a 64-bit instruction if needed. Additionally, all the used
-// and defined registers in the instruction may also need to be promoted from
-// 32-bit to 64-bit based on the promoted instruction description. If a used
-// register is promoted to 64-bit, the instruction which defines the promoted
-// register also needs to be promoted. After an instruction is promoted to 64
-// bits, the defined register of the promoted instruction is also 64-bit. A
-// defined register may be used by other instructions; in such cases,
-//  we need to extract the 32-bit register used by other
-//  non-promoted 32-bit instructions from the promoted 64-bit register.
+// than once. This is done to prevent exponential recursion.
 void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
                                                   MachineRegisterInfo *MRI,
                                                   unsigned BinOpDepth,
@@ -5258,19 +5253,19 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
     return;
 
   unsigned Opcode = MI->getOpcode();
-  bool IsNonSignedExtInstrPromoted = false;
+  bool IsNonSignedExtInstrNeedPromoted = false;
   int NewOpcode = -1;
 
   auto CheckAndSetNewOpcode = [&](int NewOpc) {
-    if (!IsNonSignedExtInstrPromoted) {
+    if (!IsNonSignedExtInstrNeedPromoted) {
       NewOpcode = NewOpc;
-      IsNonSignedExtInstrPromoted = true;
+      IsNonSignedExtInstrNeedPromoted = true;
     }
   };
 
   auto SetNewOpcode = [&](int NewOpc) {
     NewOpcode = NewOpc;
-    IsNonSignedExtInstrPromoted = true;
+    IsNonSignedExtInstrNeedPromoted = true;
   };
 
   switch (Opcode) {
@@ -5295,22 +5290,31 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
         PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
       }
 
-      if (!IsNonSignedExtInstrPromoted)
+      if (!IsNonSignedExtInstrNeedPromoted)
         return;
     }
     break;
   case PPC::COPY: {
+    // Refer to the logic of the `case PPC::COPY` statement in the function
+    // PPCInstrInfo::isSignOrZeroExtended().
+
     Register SrcReg = MI->getOperand(1).getReg();
+    // In both ELFv1 and v2 ABI, method parameters and the return value
+    // are sign- or zero-extended.
     const MachineFunction *MF = MI->getMF();
     if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+      // If this is a copy from another register, we recursively promote source.
       PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
       return;
     }
-    // From here on everything is SVR4ABI
+    // From here on everything is SVR4ABI. COPY will be eliminated in other
+    // pass, we do not need promote COPY pseduo opcode.
+
     if (MI->getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock())
       return;
 
     if (SrcReg != PPC::X3) {
+      // If this is a copy from another register, we recursively promote source.
       PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
       return;
     }
@@ -5336,7 +5340,7 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
     PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
     // If Opcode is PPC::ORI8, PPC::XORI8, PPC::ORIS8, or PPC::XORIS8,
     // the instruction does not need to be promoted.
-    if (!IsNonSignedExtInstrPromoted)
+    if (!IsNonSignedExtInstrNeedPromoted)
       return;
     break;
   }
@@ -5350,7 +5354,7 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
       Register SrcReg2 = MI->getOperand(2).getReg();
       PromoteInstr32To64ForEmliEXTSW(SrcReg2, MRI, BinOpDepth, LV);
       // If Opcode is PPC::AND8, the instruction does not need to be promoted.
-      if (!IsNonSignedExtInstrPromoted)
+      if (!IsNonSignedExtInstrNeedPromoted)
         return;
     }
     break;
@@ -5381,14 +5385,17 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
       MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
   if ((definedBySignExtendingOp(Reg, MRI) && !TII->isZExt32To64(Opcode) &&
        !isOpZeroOfSubwordPreincLoad(Opcode)) ||
-      IsNonSignedExtInstrPromoted) {
+      IsNonSignedExtInstrNeedPromoted) {
 
     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
 
     if (RC == &PPC::G8RCRegClass || RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
       return;
 
-    if (!IsNonSignedExtInstrPromoted)
+    // The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
+    // map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
+    // instruction with the same opcode.
+    if (!IsNonSignedExtInstrNeedPromoted)
       NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
 
     assert(NewOpcode != -1 &&
@@ -5412,7 +5419,7 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
     auto MBB = MI->getParent();
 
     // Since the pseudo-opcode of the instruction is promoted from 32-bit to
-    // 64-bit, if the operand reg class of the original instruction belongs to
+    // 64-bit, if the source reg class of the original instruction belongs to
     // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
     // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
     // respectively.
@@ -5463,7 +5470,10 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
       LV->recomputeForSingleDefVirtReg(Iter->second);
     MI->eraseFromParent();
 
-    // Demote the 64-bit defined regster to a 32-bit register.
+    // A defined register may be used by other instructions that are 32-bit.
+    // After the defined register is promoted to 64-bit for the promoted
+    // instruction, we need to demote the 64-bit defined register back to a
+    // 32-bit register
     BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
         .addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
     LV->recomputeForSingleDefVirtReg(NewDefinedReg);
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 896ed100e4780..b7b93335aaf5b 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1050,14 +1050,14 @@ bool PPCMIPeephole::simplifyCode() {
         } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
                    TII->isSignExtended(NarrowReg, MRI)) {
           // We can eliminate EXTSW if the input is known to be already
-          // sign-extended. but we are not sure whether a spill will occur
-          // during register allocation. All these instructions in the chain
-          // used to deduce sign extension to eliminate the 'extsw' will need to
-          // be promoted to 64-bit pseudo instructions when the 'extsw' is
-          // eliminated. If there is no promotion, it will use the 'stw' instead
-          // of 'std', and 'lwz' instead of 'ld' when spilling, since the
-          // register class is 32-bits. Consequently, the high 32-bit
-          // information will be lost.
+          // sign-extended. However, we are not sure whether a spill will occur
+          // during register allocation. If there is no promotion, it will use
+          // 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
+          // since the register class is 32-bits. Consequently, the high 32-bit
+          // information will be lost. Therefore, all these instructions in the
+          // chain used to deduce sign extension to eliminate the 'extsw' will
+          // need to be promoted to 64-bit pseudo instructions when the 'extsw'
+          // is eliminated.
           TII->PromoteInstr32To64ForEmliEXTSW(NarrowReg, MRI, 0, LV);
 
           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
>From 95267f514d515e62319affa89e3606ebeaf39866 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 15 May 2024 10:45:28 -0400
Subject: [PATCH 09/15] delete some unnecessary instruction promotion
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      | 27 +------------------
 .../convert-rr-to-ri-instrs-out-of-range.mir  |  2 +-
 .../PowerPC/convert-rr-to-ri-instrs.mir       |  2 +-
 3 files changed, 3 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 71a13cb73b9f3..ea2cfa78e4626 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5263,11 +5263,6 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
     }
   };
 
-  auto SetNewOpcode = [&](int NewOpc) {
-    NewOpcode = NewOpc;
-    IsNonSignedExtInstrNeedPromoted = true;
-  };
-
   switch (Opcode) {
   case PPC::OR:
     CheckAndSetNewOpcode(PPC::OR8);
@@ -5359,33 +5354,13 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
     }
     break;
   }
-  case PPC::RLWINM:
-    SetNewOpcode(PPC::RLWINM8);
-    break;
-  case PPC::RLWINM_rec:
-    SetNewOpcode(PPC::RLWINM8_rec);
-    break;
-  case PPC::RLWNM:
-    SetNewOpcode(PPC ::RLWNM8);
-    break;
-  case PPC::RLWNM_rec:
-    SetNewOpcode(PPC::RLWNM8_rec);
-    break;
-  case PPC::ANDC_rec:
-    SetNewOpcode(PPC::ANDC8_rec);
-    break;
-  case PPC::ANDIS_rec:
-    SetNewOpcode(PPC::ANDIS8_rec);
-    break;
   default:
     break;
   }
 
   const PPCInstrInfo *TII =
       MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
-  if ((definedBySignExtendingOp(Reg, MRI) && !TII->isZExt32To64(Opcode) &&
-       !isOpZeroOfSubwordPreincLoad(Opcode)) ||
-      IsNonSignedExtInstrNeedPromoted) {
+  if (TII->isSExt32To64(Opcode) || IsNonSignedExtInstrNeedPromoted) {
 
     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
 
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
index cd48034d61a12..cdd6be56b46d5 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
@@ -256,7 +256,7 @@ body:             |
     %3 = IMPLICIT_DEF
     %2 = LI 170
     %4 = RLWNM killed %1, %2, 20, 27
-    ; CHECK: RLWINM8 killed %6, 10, 20, 27
+    ; CHECK: RLWINM killed %1, 10, 20, 27
     ; CHECK-LATE: rlwinm 3, 3, 10, 20, 27
     $x3 = EXTSW_32_64 %4
     BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index 5ffdefe1960fc..fa06dd551a0d4 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -4873,7 +4873,7 @@ body:             |
     %2 = LI 8
     %5 = COPY %0.sub_32
     %8 = SRW killed %5, killed %2
-    ; CHECK: RLWINM8 killed %10, 24, 8, 31
+    ; CHECK: RLWINM killed %5, 24, 8, 31
     ; CHECK-LATE: srwi 3, 3, 8
     $x3 = EXTSW_32_64 %8
     BLR8 implicit $lr8, implicit $rm, implicit $x3
>From 4482b299f726761791d1dd3de4ced054d7a3a08a Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 15 May 2024 11:18:28 -0400
Subject: [PATCH 10/15] polish the code
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index ea2cfa78e4626..032fc5a3d9b4c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5284,9 +5284,6 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
         Register SrcReg = MI->getOperand(I).getReg();
         PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
       }
-
-      if (!IsNonSignedExtInstrNeedPromoted)
-        return;
     }
     break;
   case PPC::COPY: {
@@ -5302,19 +5299,15 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
       PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
       return;
     }
+
     // From here on everything is SVR4ABI. COPY will be eliminated in other
     // pass, we do not need promote COPY pseduo opcode.
 
-    if (MI->getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock())
-      return;
-
-    if (SrcReg != PPC::X3) {
+    if (SrcReg != PPC::X3)
       // If this is a copy from another register, we recursively promote source.
       PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
-      return;
-    }
-  }
     return;
+  }
   case PPC::ORI:
     CheckAndSetNewOpcode(PPC::ORI8);
     [[fallthrough]];
@@ -5333,10 +5326,6 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
   case PPC::XORIS8: {
     Register SrcReg = MI->getOperand(1).getReg();
     PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
-    // If Opcode is PPC::ORI8, PPC::XORI8, PPC::ORIS8, or PPC::XORIS8,
-    // the instruction does not need to be promoted.
-    if (!IsNonSignedExtInstrNeedPromoted)
-      return;
     break;
   }
   case PPC::AND:
@@ -5348,9 +5337,6 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
       PromoteInstr32To64ForEmliEXTSW(SrcReg1, MRI, BinOpDepth, LV);
       Register SrcReg2 = MI->getOperand(2).getReg();
       PromoteInstr32To64ForEmliEXTSW(SrcReg2, MRI, BinOpDepth, LV);
-      // If Opcode is PPC::AND8, the instruction does not need to be promoted.
-      if (!IsNonSignedExtInstrNeedPromoted)
-        return;
     }
     break;
   }
@@ -5452,7 +5438,6 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
     BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
         .addReg(NewDefinedReg, RegState::Kill, PPC::sub_32);
     LV->recomputeForSingleDefVirtReg(NewDefinedReg);
-    return;
   }
   return;
 }
>From 8e98ab8e21ee57a9a6e5c9194c02cefe0a3f63cf Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 15 May 2024 12:34:43 -0400
Subject: [PATCH 11/15] polish the code
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 032fc5a3d9b4c..8950cbc6ec650 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5385,7 +5385,6 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
     // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
     // respectively.
     DenseMap<unsigned, Register> PromoteRegs;
-    DenseMap<unsigned, Register> ReCalRegs;
     for (unsigned i = 1; i < MI->getNumOperands(); i++) {
       MachineOperand &Operand = MI->getOperand(i);
       if (Operand.isReg()) {
@@ -5408,9 +5407,6 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
               .addReg(OperandReg)
               .addImm(PPC::sub_32);
           PromoteRegs[i] = DstTmpReg;
-          ReCalRegs[i] = DstTmpReg;
-        } else {
-          ReCalRegs[i] = OperandReg;
         }
       }
     }
@@ -5420,15 +5416,24 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
     BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewDefinedReg);
     MachineBasicBlock::instr_iterator Iter(MI);
     --Iter;
-    for (unsigned i = 1; i < MI->getNumOperands(); i++)
+    MachineInstrBuilder  MIBuilder(*Iter->getMF(), Iter);
+    for (unsigned i = 1; i < MI->getNumOperands(); i++) {
       if (PromoteRegs.find(i) != PromoteRegs.end())
-        MachineInstrBuilder(*Iter->getMF(), Iter)
-            .addReg(PromoteRegs[i], RegState::Kill);
+        MIBuilder.addReg(PromoteRegs[i], RegState::Kill);
       else
         Iter->addOperand(MI->getOperand(i));
+    }
+
+    for (unsigned i = 1; i < Iter->getNumOperands(); i++) {
+      MachineOperand &Operand = Iter->getOperand(i);
+      if (Operand.isReg()) {
+        Register OperandReg = Operand.getReg();
+        if (!OperandReg.isVirtual())
+          continue;
+        LV->recomputeForSingleDefVirtReg(OperandReg);
+      }
+    }
 
-    for (auto Iter = ReCalRegs.begin(); Iter != ReCalRegs.end(); Iter++)
-      LV->recomputeForSingleDefVirtReg(Iter->second);
     MI->eraseFromParent();
 
     // A defined register may be used by other instructions that are 32-bit.
>From 9b0b9c84269c7a67fc8ab2e0144e88ec0be1f306 Mon Sep 17 00:00:00 2001
From: zhijian lin <zhijian at ca.ibm.com>
Date: Tue, 28 May 2024 15:48:23 -0400
Subject: [PATCH 12/15] Update llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Co-authored-by: Amy Kwan <amy.kwan1 at ibm.com>
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 8950cbc6ec650..47bd95fd41964 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5235,12 +5235,12 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
 // (e.g. AND) to avoid excessive cost.
 const unsigned MAX_BINOP_DEPTH = 1;
 
-// The function will promote the instruction which defines the register `Reg`
+// This function will promote the instruction which defines the register `Reg`
 // in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
 // used to check whether an instruction needs to be promoted or not is similar
-// to the logic used to check a defined register whether is isSignOrZeroExtended
-// or not in the function PPCInstrInfo::isSignOrZeroExtended. The
-// `PromoteInstr32To64ForEmliEXTSW` function is recursive. The parameter
+// to the logic used to check whether or not a defined register is sign or zero extended
+// within the function PPCInstrInfo::isSignOrZeroExtended. Additionally, the
+// `PromoteInstr32To64ForElimEXTSW` function is recursive. 
 // BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
 // incremented  only when `PromoteInstr32To64ForEmliEXTSW` calls itself more
 // than once. This is done to prevent exponential recursion.
>From 8550317de4c96a7bef3f030a0f7ab609331b97df Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 28 May 2024 16:13:30 -0400
Subject: [PATCH 13/15] address comment
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp  | 34 ++++++++++++-----------
 llvm/lib/Target/PowerPC/PPCInstrInfo.h    |  2 +-
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp |  2 +-
 3 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 47bd95fd41964..9c22642954d8c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5238,13 +5238,13 @@ const unsigned MAX_BINOP_DEPTH = 1;
 // This function will promote the instruction which defines the register `Reg`
 // in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
 // used to check whether an instruction needs to be promoted or not is similar
-// to the logic used to check whether or not a defined register is sign or zero extended
-// within the function PPCInstrInfo::isSignOrZeroExtended. Additionally, the
-// `PromoteInstr32To64ForElimEXTSW` function is recursive. 
+// to the logic used to check whether or not a defined register is sign or zero
+// extended within the function PPCInstrInfo::isSignOrZeroExtended.
+// Additionally, the `PromoteInstr32To64ForElimEXTSW` function is recursive.
 // BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
-// incremented  only when `PromoteInstr32To64ForEmliEXTSW` calls itself more
+// incremented  only when `PromoteInstr32To64ForElimEXTSW` calls itself more
 // than once. This is done to prevent exponential recursion.
-void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
+void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,
                                                   MachineRegisterInfo *MRI,
                                                   unsigned BinOpDepth,
                                                   LiveVariables *LV) const {
@@ -5282,12 +5282,12 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
       for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
         assert(MI->getOperand(I).isReg() && "Operand must be register");
         Register SrcReg = MI->getOperand(I).getReg();
-        PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
+        PromoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
       }
     }
     break;
   case PPC::COPY: {
-    // Refer to the logic of the `case PPC::COPY` statement in the function
+    // Refers to the logic of the `case PPC::COPY` statement in the function
     // PPCInstrInfo::isSignOrZeroExtended().
 
     Register SrcReg = MI->getOperand(1).getReg();
@@ -5295,17 +5295,19 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
     // are sign- or zero-extended.
     const MachineFunction *MF = MI->getMF();
     if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
-      // If this is a copy from another register, we recursively promote source.
-      PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
+      // If this is a copy from another register, we recursively promote the
+      // source.
+      PromoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
       return;
     }
 
-    // From here on everything is SVR4ABI. COPY will be eliminated in other
-    // pass, we do not need promote COPY pseduo opcode.
+    // From here on everything is SVR4ABI. COPY will be eliminated in the other
+    // pass, we do not need promote the COPY pseudo opcode.
 
     if (SrcReg != PPC::X3)
-      // If this is a copy from another register, we recursively promote source.
-      PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
+      // If this is a copy from another register, we recursively promote the
+      // source.
+      PromoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
     return;
   }
   case PPC::ORI:
@@ -5325,7 +5327,7 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
   case PPC::ORIS8:
   case PPC::XORIS8: {
     Register SrcReg = MI->getOperand(1).getReg();
-    PromoteInstr32To64ForEmliEXTSW(SrcReg, MRI, BinOpDepth, LV);
+    PromoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
     break;
   }
   case PPC::AND:
@@ -5334,9 +5336,9 @@ void PPCInstrInfo::PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
   case PPC::AND8: {
     if (BinOpDepth < MAX_BINOP_DEPTH) {
       Register SrcReg1 = MI->getOperand(1).getReg();
-      PromoteInstr32To64ForEmliEXTSW(SrcReg1, MRI, BinOpDepth, LV);
+      PromoteInstr32To64ForElimEXTSW(SrcReg1, MRI, BinOpDepth, LV);
       Register SrcReg2 = MI->getOperand(2).getReg();
-      PromoteInstr32To64ForEmliEXTSW(SrcReg2, MRI, BinOpDepth, LV);
+      PromoteInstr32To64ForElimEXTSW(SrcReg2, MRI, BinOpDepth, LV);
     }
     break;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index cdf533e78e496..9b5c076657c20 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -625,7 +625,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                       const MachineRegisterInfo *MRI) const {
     return isSignOrZeroExtended(Reg, 0, MRI).second;
   }
-  void PromoteInstr32To64ForEmliEXTSW(const Register &Reg,
+  void PromoteInstr32To64ForElimEXTSW(const Register &Reg,
                                       MachineRegisterInfo *MRI,
                                       unsigned BinOpDepth,
                                       LiveVariables *LV) const;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index b7b93335aaf5b..2a6cf3314c80f 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1058,7 +1058,7 @@ bool PPCMIPeephole::simplifyCode() {
           // chain used to deduce sign extension to eliminate the 'extsw' will
           // need to be promoted to 64-bit pseudo instructions when the 'extsw'
           // is eliminated.
-          TII->PromoteInstr32To64ForEmliEXTSW(NarrowReg, MRI, 0, LV);
+          TII->PromoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);
 
           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
           Register TmpReg =
>From 9a4e4d0ab292faa1985f7207e9e1ef9a236135e3 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 13 Jun 2024 11:28:23 -0400
Subject: [PATCH 14/15] minor: change function name from
 PromoteInstr32To64ForElimEXTSW to promoteInstr32To64ForElimEXTSW
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp  | 18 +++++++++---------
 llvm/lib/Target/PowerPC/PPCInstrInfo.h    |  2 +-
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9c22642954d8c..9b896a6d99e6b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5240,11 +5240,11 @@ const unsigned MAX_BINOP_DEPTH = 1;
 // used to check whether an instruction needs to be promoted or not is similar
 // to the logic used to check whether or not a defined register is sign or zero
 // extended within the function PPCInstrInfo::isSignOrZeroExtended.
-// Additionally, the `PromoteInstr32To64ForElimEXTSW` function is recursive.
+// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
 // BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
-// incremented  only when `PromoteInstr32To64ForElimEXTSW` calls itself more
+// incremented  only when `promoteInstr32To64ForElimEXTSW` calls itself more
 // than once. This is done to prevent exponential recursion.
-void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,
+void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register &Reg,
                                                   MachineRegisterInfo *MRI,
                                                   unsigned BinOpDepth,
                                                   LiveVariables *LV) const {
@@ -5282,7 +5282,7 @@ void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,
       for (unsigned I = 1; I < OperandEnd; I += OperandStride) {
         assert(MI->getOperand(I).isReg() && "Operand must be register");
         Register SrcReg = MI->getOperand(I).getReg();
-        PromoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
+        promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth + 1, LV);
       }
     }
     break;
@@ -5297,7 +5297,7 @@ void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,
     if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
       // If this is a copy from another register, we recursively promote the
       // source.
-      PromoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
+      promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
       return;
     }
 
@@ -5307,7 +5307,7 @@ void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,
     if (SrcReg != PPC::X3)
       // If this is a copy from another register, we recursively promote the
       // source.
-      PromoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
+      promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
     return;
   }
   case PPC::ORI:
@@ -5327,7 +5327,7 @@ void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,
   case PPC::ORIS8:
   case PPC::XORIS8: {
     Register SrcReg = MI->getOperand(1).getReg();
-    PromoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
+    promoteInstr32To64ForElimEXTSW(SrcReg, MRI, BinOpDepth, LV);
     break;
   }
   case PPC::AND:
@@ -5336,9 +5336,9 @@ void PPCInstrInfo::PromoteInstr32To64ForElimEXTSW(const Register &Reg,
   case PPC::AND8: {
     if (BinOpDepth < MAX_BINOP_DEPTH) {
       Register SrcReg1 = MI->getOperand(1).getReg();
-      PromoteInstr32To64ForElimEXTSW(SrcReg1, MRI, BinOpDepth, LV);
+      promoteInstr32To64ForElimEXTSW(SrcReg1, MRI, BinOpDepth, LV);
       Register SrcReg2 = MI->getOperand(2).getReg();
-      PromoteInstr32To64ForElimEXTSW(SrcReg2, MRI, BinOpDepth, LV);
+      promoteInstr32To64ForElimEXTSW(SrcReg2, MRI, BinOpDepth, LV);
     }
     break;
   }
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 9b5c076657c20..4a53347ec6714 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -625,7 +625,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                       const MachineRegisterInfo *MRI) const {
     return isSignOrZeroExtended(Reg, 0, MRI).second;
   }
-  void PromoteInstr32To64ForElimEXTSW(const Register &Reg,
+  void promoteInstr32To64ForElimEXTSW(const Register &Reg,
                                       MachineRegisterInfo *MRI,
                                       unsigned BinOpDepth,
                                       LiveVariables *LV) const;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 2a6cf3314c80f..541e462e8b4cc 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1058,7 +1058,7 @@ bool PPCMIPeephole::simplifyCode() {
           // chain used to deduce sign extension to eliminate the 'extsw' will
           // need to be promoted to 64-bit pseudo instructions when the 'extsw'
           // is eliminated.
-          TII->PromoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);
+          TII->promoteInstr32To64ForElimEXTSW(NarrowReg, MRI, 0, LV);
 
           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
           Register TmpReg =
>From 3798b036470b5930e89e8872be99e2b22de6b620 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 13 Jun 2024 15:14:21 -0400
Subject: [PATCH 15/15] add not Reg.isVirtual() check
---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 3 +++
 1 file changed, 3 insertions(+)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9b896a6d99e6b..21b52257fecc7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5248,6 +5248,9 @@ void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register &Reg,
                                                   MachineRegisterInfo *MRI,
                                                   unsigned BinOpDepth,
                                                   LiveVariables *LV) const {
+  if (!Reg.isVirtual())
+    return;
+
   MachineInstr *MI = MRI->getVRegDef(Reg);
   if (!MI)
     return;
    
    
More information about the llvm-commits
mailing list