[llvm] fix a bug of PPCMIPeepholes which description in issue 71030 (PR #85451)

zhijian lin via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 19 10:39:20 PDT 2024


https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/85451

>From 0f41554d3b333dc1d1423dbd6303d94c2900f543 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 19 Mar 2024 09:56:43 -0400
Subject: [PATCH 1/4] fixed a bug of PPCMIPeepholes which description in issue
 71030

---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      | 163 ++++
 llvm/lib/Target/PowerPC/PPCInstrInfo.h        |   5 +
 llvm/lib/Target/PowerPC/PPCInstrInfo.td       |   5 +-
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp     |   1 +
 .../convert-rr-to-ri-instrs-out-of-range.mir  |   4 +-
 .../PowerPC/convert-rr-to-ri-instrs.mir       |  10 +-
 ...ole-replaceInstr-after-eliminate-extsw.mir | 698 ++++++++++++++++++
 llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll    |   2 +
 .../CodeGen/PowerPC/select-constant-xor.ll    |   4 +
 .../test/CodeGen/PowerPC/sext_elimination.mir |  10 +-
 .../PowerPC/stack-restore-with-setjmp.ll      |   4 +-
 .../CodeGen/PowerPC/store-forward-be64.ll     |   1 +
 12 files changed, 894 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 5f5eb31a5a85fa..b260565d64fbbf 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5219,6 +5219,169 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
 // We limit the max depth to track incoming values of PHIs or binary ops
 // (e.g. AND) to avoid excessive cost.
 const unsigned MAX_BINOP_DEPTH = 1;
+
+void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
+                                                  MachineRegisterInfo *MRI,
+                                                  unsigned BinOpDepth,
+                                                  LiveVariables *LV) const {
+  if (MRI->getRegClass(Reg) == &PPC::G8RCRegClass)
+    return;
+
+  MachineInstr *MI = MRI->getVRegDef(Reg);
+  if (!MI)
+    return;
+
+  unsigned Opcode = MI->getOpcode();
+  bool IsRelplaceIntr = false;
+  switch (Opcode) {
+  case PPC::OR:
+  case PPC::OR8:
+  case PPC::PHI:
+  case PPC::ISEL:
+    if (BinOpDepth < MAX_BINOP_DEPTH) {
+      if (Opcode == PPC::OR || Opcode == PPC::ISEL)
+        IsRelplaceIntr = true;
+      unsigned OperandEnd = 3, OperandStride = 1;
+      if (Opcode == PPC::PHI) {
+        OperandEnd = MI->getNumOperands();
+        OperandStride = 2;
+      }
+
+      for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
+        assert(MI->getOperand(I).isReg() && "Operand must be register");
+        Register SrcReg = MI->getOperand(I).getReg();
+        replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth + 1, LV);
+      }
+    }
+    break;
+    /*
+  case PPC::COPY: {
+    Register SrcReg = MI->getOperand(1).getReg();
+    const MachineFunction *MF = MI->getMF();
+    if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+      replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+    }
+
+  } break;*/
+  case PPC::ORI:
+  case PPC::XORI:
+  case PPC::ORI8:
+  case PPC::XORI8:
+  case PPC::ORIS:
+  case PPC::XORIS:
+  case PPC::ORIS8:
+  case PPC::XORIS8: {
+    if (Opcode == PPC::ORI || Opcode == PPC::XORI || Opcode == PPC::ORIS ||
+        Opcode == PPC::ORIS || Opcode == PPC::XORIS)
+      IsRelplaceIntr = true;
+    Register SrcReg = MI->getOperand(1).getReg();
+    replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+    break;
+  }
+  case PPC::AND:
+  case PPC::AND8: {
+    if (BinOpDepth < MAX_BINOP_DEPTH) {
+      if (Opcode == PPC::AND)
+        IsRelplaceIntr = true;
+      Register SrcReg1 = MI->getOperand(1).getReg();
+      replaceInstrAfterElimExt32To64(SrcReg1, MRI, BinOpDepth, LV);
+      Register SrcReg2 = MI->getOperand(2).getReg();
+      replaceInstrAfterElimExt32To64(SrcReg2, MRI, BinOpDepth, LV);
+    }
+    break;
+  }
+  default:
+    break;
+  }
+
+  const PPCInstrInfo *TII =
+      MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
+  if ((definedBySignExtendingOp(Reg, MRI) && !TII->isZExt32To64(Opcode) &&
+       !isOpZeroOfSubwordPreincLoad(Opcode)) ||
+      IsRelplaceIntr) {
+
+    // Fix Me: Most of the opcode of 64-bit instruction equal to the opcode of
+    // 32-bit version of same instruction plus one. But there are some
+    // exception: PPC::ANDC_rec, PPC::ANDI_rec, PPC::ANDIS_rec.
+    unsigned NewOpcode = Opcode + 1;
+
+    if (Opcode == PPC::ANDC_rec)
+      NewOpcode = PPC::ANDC8_rec;
+    if (Opcode == PPC::ANDI_rec)
+      NewOpcode = PPC::ANDI8_rec;
+    if (Opcode == PPC::ANDIS_rec)
+      NewOpcode = PPC::ANDIS8_rec;
+
+    const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+    const MCInstrDesc &MCID = TII->get(NewOpcode);
+
+    Register SrcReg = MI->getOperand(0).getReg();
+    const TargetRegisterClass *NewRC =
+        TRI->getRegClass(MCID.operands()[0].RegClass);
+    const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
+
+    if (NewRC == SrcRC)
+      return;
+
+    DebugLoc DL = MI->getDebugLoc();
+    auto MBB = MI->getParent();
+
+    // If the oprand of the instruction is Register which isPPC::GRCRegClass, we
+    // need to promot the Oprande to PPC::G8RCRegClass.
+    DenseMap<unsigned, Register> PromoteRegs;
+    DenseMap<unsigned, Register> ReCalRegs;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++) {
+      MachineOperand &Oprand = MI->getOperand(i);
+      if (Oprand.isReg()) {
+        Register OprandReg = Oprand.getReg();
+        if (!OprandReg.isVirtual())
+          continue;
+
+        const TargetRegisterClass *RC =
+            TRI->getRegClass(MCID.operands()[i].RegClass);
+        const TargetRegisterClass *OrgRC = MRI->getRegClass(OprandReg);
+        if (RC != MRI->getRegClass(OprandReg) &&
+            (OrgRC == &PPC::GPRCRegClass ||
+             OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
+          Register TmpReg = MRI->createVirtualRegister(RC);
+          Register DstTmpReg = MRI->createVirtualRegister(RC);
+          BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
+          BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
+              .addReg(TmpReg)
+              .addReg(OprandReg)
+              .addImm(PPC::sub_32);
+          PromoteRegs[i] = DstTmpReg;
+          ReCalRegs[i] = DstTmpReg;
+        } else {
+          ReCalRegs[i] = OprandReg;
+        }
+      }
+    }
+
+    Register NewReg = MRI->createVirtualRegister(NewRC);
+
+    BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewReg);
+    MachineBasicBlock::instr_iterator Iter(MI);
+    --Iter;
+    for (unsigned i = 1; i < MI->getNumOperands(); i++)
+      if (PromoteRegs.find(i) != PromoteRegs.end())
+        MachineInstrBuilder(*Iter->getMF(), Iter)
+            .addReg(PromoteRegs[i], RegState::Kill);
+      else
+        Iter->addOperand(MI->getOperand(i));
+
+    for (auto Iter = ReCalRegs.begin(); Iter != ReCalRegs.end(); Iter++)
+      LV->recomputeForSingleDefVirtReg(Iter->second);
+    MI->eraseFromParent();
+
+    BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
+        .addReg(NewReg, RegState::Kill, PPC::sub_32);
+    LV->recomputeForSingleDefVirtReg(NewReg);
+    return;
+  }
+  return;
+}
+
 // The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
 // does not count all of the recursions. The parameter BinOpDepth is incremented
 // only when isSignOrZeroExtended calls itself more than once. This is done to
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 045932dc0d3ba1..f6e79707913c7b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -17,6 +17,7 @@
 #include "PPC.h"
 #include "PPCRegisterInfo.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
@@ -610,6 +611,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                       const MachineRegisterInfo *MRI) const {
     return isSignOrZeroExtended(Reg, 0, MRI).second;
   }
+  void replaceInstrAfterElimExt32To64(const Register &Reg,
+                                      MachineRegisterInfo *MRI,
+                                      unsigned BinOpDepth,
+                                      LiveVariables *LV) const;
 
   bool convertToImmediateForm(MachineInstr &MI,
                               SmallSet<Register, 4> &RegsToUpdate,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 82da1a3c305983..7c94add841402a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2408,7 +2408,7 @@ defm SRW  : XForm_6r<31, 536, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                      [(set i32:$RA, (PPCsrl i32:$RST, i32:$RB))]>, ZExt32To64;
 defm SRAW : XForm_6rc<31, 792, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
                       "sraw", "$RA, $RST, $RB", IIC_IntShift,
-                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>, SExt32To64;
+                      [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>;
 }
 
 def : InstAlias<"mr $rA, $rB", (OR gprc:$rA, gprc:$rB, gprc:$rB)>;
@@ -2423,8 +2423,7 @@ let PPC970_Unit = 1 in {  // FXU Operations.
 let hasSideEffects = 0 in {
 defm SRAWI : XForm_10rc<31, 824, (outs gprc:$RA), (ins gprc:$RST, u5imm:$RB),
                         "srawi", "$RA, $RST, $RB", IIC_IntShift,
-                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>,
-                        SExt32To64;
+                        [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>;
 defm CNTLZW : XForm_11r<31,  26, (outs gprc:$RA), (ins gprc:$RST),
                         "cntlzw", "$RA, $RST", IIC_IntGeneral,
                         [(set i32:$RA, (ctlz i32:$RST))]>, ZExt32To64;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 494e4b52a5b5eb..76b9c19db2b3eb 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1037,6 +1037,7 @@ bool PPCMIPeephole::simplifyCode() {
                    TII->isSignExtended(NarrowReg, MRI)) {
           // We can eliminate EXTSW if the input is known to be already
           // sign-extended.
+          TII->replaceInstrAfterElimExt32To64(NarrowReg, MRI, 0, LV);
           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
           Register TmpReg =
               MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
index dfbf412a939212..03d38aeb20e2e8 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
@@ -256,7 +256,7 @@ body:             |
     %3 = IMPLICIT_DEF
     %2 = LI 170
     %4 = RLWNM killed %1, %2, 20, 27
-    ; CHECK: RLWINM killed %1, 10, 20, 27
+    ; CHECK: RLWINM8 killed %6, 10, 20, 27
     ; CHECK-LATE: rlwinm 3, 3, 10, 20, 27
     $x3 = EXTSW_32_64 %4
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -604,7 +604,7 @@ body:             |
     %2 = LI 48
     %5 = COPY %0.sub_32
     %8 = SRW killed %5, killed %2
-    ; CHECK: LI 0
+    ; CHECK: LI8 0
     ; CHECK-LATE: li 3, 0
     $x3 = EXTSW_32_64 %8
     BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index 761316ed7726d7..a1e4cd38d56efe 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -1348,7 +1348,7 @@ body:             |
     %1 = LI 77
     %2 = ADDI killed %1, 44
     %3 = EXTSW_32_64 killed %2
-    ; CHECK: LI 121
+    ; CHECK: LI8 121
     ; CHECK-LATE: li 3, 121
     $x3 = COPY %3
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -3573,7 +3573,7 @@ body:             |
 
     %0 = LI 777
     %1 = ORI %0, 88
-    ; CHECK: LI 857
+    ; CHECK: LI8 857
     ; CHECK-LATE: li 3, 857
     $x3 = EXTSW_32_64 %1
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -4145,7 +4145,7 @@ body:             |
     %3 = IMPLICIT_DEF
     %2 = LI 17
     %4 = RLWINM killed %2, 4, 20, 27
-    ; CHECK: LI 272
+    ; CHECK: LI8 272
     ; CHECK-LATE: li 3, 272
     $x3 = EXTSW_32_64 %4
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -4873,7 +4873,7 @@ body:             |
     %2 = LI 8
     %5 = COPY %0.sub_32
     %8 = SRW killed %5, killed %2
-    ; CHECK: RLWINM killed %5, 24, 8, 31
+    ; CHECK: RLWINM8 killed %10, 24, 8, 31
     ; CHECK-LATE: srwi 3, 3, 8
     $x3 = EXTSW_32_64 %8
     BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -6456,7 +6456,7 @@ body:             |
 
     %0 = LI 871
     %1 = XORI %0, 17
-    ; CHECK: LI 886
+    ; CHECK: LI8 886
     ; CHECK-LATE: li 3, 886
     $x3 = EXTSW_32_64 %1
     BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
new file mode 100644
index 00000000000000..1b54ba7a38b816
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
@@ -0,0 +1,698 @@
+# RUN: llc -run-pass=ppc-mi-peepholes  -mtriple powerpc64-ibm-aix-xcoff %s -o - \
+# RUN:   -verify-machineinstrs | FileCheck %s
+
+--- |
+  ; ModuleID = '71030_tmp_reduce-O2.ll'
+  source_filename = "71030_tmp_reduce.c"
+  target datalayout = "E-m:a-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+  target triple = "powerpc64-ibm-aix-xcoff"
+  
+  @globalShortValue = local_unnamed_addr global i16 1, align 2
+  @globalCharValue = local_unnamed_addr global i8 0, align 1
+  @largeNumber = local_unnamed_addr global i64 -3664682556119382352, align 8
+  @someIntValue = local_unnamed_addr global i32 378441747, align 4
+  @unitIncrement = local_unnamed_addr global i32 1, align 4
+  @computedResultUll = local_unnamed_addr global i64 0, align 8
+  @computedResultShort = local_unnamed_addr global i16 0, align 2
+  @computedResultUChar = local_unnamed_addr global i8 0, align 1
+  @computedResultBool = local_unnamed_addr global i8 0, align 1
+  @computedResultChar = local_unnamed_addr global i8 0, align 1
+  @shortArray = local_unnamed_addr global [8 x i16] zeroinitializer, align 2
+  @charArray = local_unnamed_addr global [8 x [8 x [8 x i8]]] zeroinitializer, align 1
+  @longArray = local_unnamed_addr global [8 x [8 x i64]] zeroinitializer, align 8
+  @resultArray = local_unnamed_addr global [8 x [8 x i16]] zeroinitializer, align 2
+  @ullArray = local_unnamed_addr global [8 x i64] zeroinitializer, align 8
+  @intArray = local_unnamed_addr global [8 x [8 x [8 x i32]]] zeroinitializer, align 4
+  @_MergedGlobals = private constant <{ [29 x i8], [46 x i8] }> <{ [29 x i8] c"Computed Result (ULL): %llx\0A\00", [46 x i8] c"Computed convert largeNumber&&&& (ULL): %llx\0A\00" }>, align 1
+  
+  @.str.1 = private alias [29 x i8], ptr @_MergedGlobals
+  @.str = private alias [46 x i8], getelementptr inbounds (<{ [29 x i8], [46 x i8] }>, ptr @_MergedGlobals, i32 0, i32 1)
+  
+  ; Function Attrs: nofree nounwind
+  define noundef signext i32 @main() local_unnamed_addr #0 {
+  entry:
+    store i16 -1, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3), align 2, !tbaa !3
+    %0 = load i64, ptr @largeNumber, align 8, !tbaa !7
+    %conv = trunc i64 %0 to i32
+    %sext = shl i32 %conv, 16
+    %conv1 = ashr exact i32 %sext, 16
+    %sub = add nsw i32 %conv1, -1705
+    %call = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) getelementptr inbounds (<{ [29 x i8], [46 x i8] }>, ptr @_MergedGlobals, i32 0, i32 1), i32 noundef signext %sub)
+    %1 = load i16, ptr @globalShortValue, align 2, !tbaa !3
+    %2 = load i32, ptr @someIntValue, align 4, !tbaa !9
+    %3 = trunc i32 %2 to i8
+    %conv20 = add i8 %3, -19
+    %4 = load i32, ptr @unitIncrement, align 4
+    %5 = load i8, ptr @globalCharValue, align 1
+    %conv45 = sext i8 %5 to i32
+    %computedResultShort.promoted = load i16, ptr @computedResultShort, align 2, !tbaa !3
+    %resultArray.promoted = load i16, ptr @resultArray, align 2, !tbaa !3
+    %computedResultChar.promoted149 = load i8, ptr @computedResultChar, align 1, !tbaa !11
+    %6 = sext i8 %conv20 to i64
+    %7 = load i16, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3), align 2, !tbaa !3
+    %8 = load i16, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 2), align 2
+    %conv46 = sext i16 %8 to i32
+    %cond54 = tail call i32 @llvm.smin.i32(i32 %conv45, i32 %conv46)
+    %tobool = icmp ne i32 %cond54, 0
+    %conv55 = zext i1 %tobool to i8
+    %9 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @ullArray, i64 0, i64 3), align 8
+    %tobool72 = icmp ne i64 %9, 0
+    %frombool = zext i1 %tobool72 to i8
+    %smax = tail call i64 @llvm.smax.i64(i64 %6, i64 4)
+    %10 = add nuw nsw i64 %smax, 3
+    %11 = sub i64 %10, %6
+    %12 = lshr i64 %11, 2
+    %13 = add nuw nsw i64 %12, 1
+    %n.vec = and i64 %13, 9223372036854775806
+    %14 = shl i64 %n.vec, 2
+    %ind.end = add i64 %14, %6
+    %15 = shl i64 %6, 2
+    %16 = shl i64 %6, 3
+    %17 = add nsw i64 %16, -64
+    %scevgep30 = getelementptr i8, ptr @longArray, i64 %17
+    %18 = add nsw i64 %15, 64
+    %scevgep31 = getelementptr i8, ptr @intArray, i64 %18
+    %19 = lshr i64 %13, 1
+    %20 = shl nuw nsw i64 %19, 1
+    %21 = add nsw i64 %20, -2
+    %22 = lshr i64 %21, 1
+    %23 = add nuw i64 %22, 1
+    br label %for.body16
+  
+  for.cond.cleanup15:                               ; preds = %for.cond.cleanup25
+    %24 = tail call i16 @llvm.smin.i16(i16 %1, i16 %7)
+    %conv11.le = sext i16 %24 to i64
+    store i64 %conv11.le, ptr @computedResultUll, align 8, !tbaa !7
+    %call97 = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @_MergedGlobals, i64 noundef %conv11.le)
+    ret i32 0
+  
+  for.body16:                                       ; preds = %for.cond.cleanup25, %entry
+    %lsr.iv29 = phi i32 [ %lsr.iv.next, %for.cond.cleanup25 ], [ 8, %entry ]
+    %conv36.lcssa132140 = phi i16 [ %computedResultShort.promoted, %entry ], [ %conv36.lcssa131, %for.cond.cleanup25 ]
+    %and.lcssa135139 = phi i16 [ %resultArray.promoted, %entry ], [ %and.lcssa134, %for.cond.cleanup25 ]
+    %conv81118.lcssa.lcssa137138 = phi i8 [ %computedResultChar.promoted149, %entry ], [ %conv81118.lcssa.lcssa136, %for.cond.cleanup25 ]
+    %25 = icmp slt i8 %conv20, 8
+    br i1 %25, label %for.body31.lr.ph, label %for.cond.cleanup25
+  
+  for.body31.lr.ph:                                 ; preds = %for.body16
+    %26 = icmp ult i64 %11, 4
+    store i8 %conv55, ptr @computedResultUChar, align 1, !tbaa !11
+    br i1 %26, label %for.body31.preheader, label %vector.body.preheader
+  
+  vector.body.preheader:                            ; preds = %for.body31.lr.ph
+    call void @llvm.set.loop.iterations.i64(i64 %23)
+    br label %vector.body
+  
+  vector.body:                                      ; preds = %vector.body.preheader, %vector.body
+    %vec.phi = phi i16 [ %44, %vector.body ], [ %conv36.lcssa132140, %vector.body.preheader ]
+    %vec.phi159 = phi i16 [ %45, %vector.body ], [ 0, %vector.body.preheader ]
+    %vec.phi160 = phi i16 [ %46, %vector.body ], [ %and.lcssa135139, %vector.body.preheader ]
+    %vec.phi161 = phi i16 [ %47, %vector.body ], [ -1, %vector.body.preheader ]
+    %vec.phi162 = phi i8 [ %48, %vector.body ], [ %conv81118.lcssa.lcssa137138, %vector.body.preheader ]
+    %vec.phi163 = phi i8 [ %49, %vector.body ], [ 0, %vector.body.preheader ]
+    %27 = phi ptr [ %scevgep30, %vector.body.preheader ], [ %31, %vector.body ]
+    %28 = phi ptr [ %scevgep31, %vector.body.preheader ], [ %29, %vector.body ]
+    %29 = getelementptr i8, ptr %28, i64 32
+    %30 = getelementptr i8, ptr %29, i64 16
+    %31 = getelementptr i8, ptr %27, i64 64
+    %32 = getelementptr i8, ptr %31, i64 32
+    %33 = trunc i32 %4 to i16
+    %34 = load i64, ptr %31, align 8, !tbaa !7
+    %35 = load i64, ptr %32, align 8, !tbaa !7
+    %36 = trunc i64 %34 to i16
+    %37 = trunc i64 %35 to i16
+    %38 = load i32, ptr %29, align 4, !tbaa !9
+    %39 = load i32, ptr %30, align 4, !tbaa !9
+    %40 = trunc i32 %38 to i8
+    %41 = trunc i32 %39 to i8
+    %42 = mul i8 %40, -6
+    %43 = mul i8 %41, -6
+    %44 = sub i16 %vec.phi, %33
+    %45 = sub i16 %vec.phi159, %33
+    %46 = and i16 %vec.phi160, %36
+    %47 = and i16 %vec.phi161, %37
+    %48 = add i8 %42, %vec.phi162
+    %49 = add i8 %43, %vec.phi163
+    %50 = call i1 @llvm.loop.decrement.i64(i64 1)
+    br i1 %50, label %vector.body, label %middle.block, !llvm.loop !12
+  
+  middle.block:                                     ; preds = %vector.body
+    %51 = icmp eq i64 %13, %n.vec
+    %bin.rdx = add i16 %45, %44
+    %bin.rdx164 = and i16 %47, %46
+    %bin.rdx165 = add i8 %49, %48
+    br i1 %51, label %for.cond21.for.cond.cleanup25_crit_edge, label %for.body31.preheader
+  
+  for.body31.preheader:                             ; preds = %middle.block, %for.body31.lr.ph
+    %indvars.iv.ph = phi i64 [ %6, %for.body31.lr.ph ], [ %ind.end, %middle.block ]
+    %conv36121128.ph = phi i16 [ %conv36.lcssa132140, %for.body31.lr.ph ], [ %bin.rdx, %middle.block ]
+    %and122127.ph = phi i16 [ %and.lcssa135139, %for.body31.lr.ph ], [ %bin.rdx164, %middle.block ]
+    %conv81118.lcssa124126.ph = phi i8 [ %conv81118.lcssa.lcssa137138, %for.body31.lr.ph ], [ %bin.rdx165, %middle.block ]
+    %52 = shl i64 %indvars.iv.ph, 2
+    %53 = shl i64 %indvars.iv.ph, 3
+    %scevgep = getelementptr i8, ptr getelementptr ([8 x [8 x i64]], ptr @longArray, i64 -1, i64 7, i64 4), i64 %53
+    %scevgep32 = getelementptr i8, ptr getelementptr inbounds ([8 x [8 x [8 x i32]]], ptr @intArray, i64 0, i64 0, i64 2, i64 4), i64 %52
+    %smax33 = call i64 @llvm.smax.i64(i64 %indvars.iv.ph, i64 4)
+    %54 = add i64 %smax33, 3
+    %55 = sub i64 %54, %indvars.iv.ph
+    %56 = lshr i64 %55, 2
+    %57 = add nuw nsw i64 %56, 1
+    call void @llvm.set.loop.iterations.i64(i64 %57)
+    br label %for.body31
+  
+  for.cond21.for.cond.cleanup25_crit_edge:          ; preds = %for.body31, %middle.block
+    %conv36.lcssa = phi i16 [ %bin.rdx, %middle.block ], [ %conv36, %for.body31 ]
+    %and.lcssa = phi i16 [ %bin.rdx164, %middle.block ], [ %and, %for.body31 ]
+    %.lcssa = phi i8 [ %bin.rdx165, %middle.block ], [ %67, %for.body31 ]
+    %58 = trunc i16 %1 to i8
+    store i16 %conv36.lcssa, ptr @computedResultShort, align 2, !tbaa !3
+    store i8 %58, ptr getelementptr inbounds ([8 x [8 x [8 x i8]]], ptr @charArray, i64 0, i64 2, i64 0, i64 3), align 1, !tbaa !11
+    store i16 %and.lcssa, ptr @resultArray, align 2, !tbaa !3
+    store i8 %frombool, ptr @computedResultBool, align 1, !tbaa !16
+    store i8 %.lcssa, ptr @computedResultChar, align 1, !tbaa !11
+    br label %for.cond.cleanup25
+  
+  for.cond.cleanup25:                               ; preds = %for.cond21.for.cond.cleanup25_crit_edge, %for.body16
+    %conv81118.lcssa.lcssa136 = phi i8 [ %.lcssa, %for.cond21.for.cond.cleanup25_crit_edge ], [ %conv81118.lcssa.lcssa137138, %for.body16 ]
+    %and.lcssa134 = phi i16 [ %and.lcssa, %for.cond21.for.cond.cleanup25_crit_edge ], [ %and.lcssa135139, %for.body16 ]
+    %conv36.lcssa131 = phi i16 [ %conv36.lcssa, %for.cond21.for.cond.cleanup25_crit_edge ], [ %conv36.lcssa132140, %for.body16 ]
+    %lsr.iv.next = add nsw i32 %lsr.iv29, -1
+    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+    br i1 %exitcond.not, label %for.cond.cleanup15, label %for.body16, !llvm.loop !18
+  
+  for.body31:                                       ; preds = %for.body31, %for.body31.preheader
+    %conv36121128 = phi i16 [ %conv36, %for.body31 ], [ %conv36121128.ph, %for.body31.preheader ]
+    %and122127 = phi i16 [ %and, %for.body31 ], [ %and122127.ph, %for.body31.preheader ]
+    %conv81118.lcssa124126 = phi i8 [ %67, %for.body31 ], [ %conv81118.lcssa124126.ph, %for.body31.preheader ]
+    %59 = phi ptr [ %scevgep, %for.body31.preheader ], [ %62, %for.body31 ]
+    %60 = phi ptr [ %scevgep32, %for.body31.preheader ], [ %61, %for.body31 ]
+    %61 = getelementptr i8, ptr %60, i64 16
+    %62 = getelementptr i8, ptr %59, i64 32
+    %63 = trunc i32 %4 to i16
+    %64 = load i64, ptr %62, align 8, !tbaa !7
+    %conv61 = trunc i64 %64 to i16
+    %65 = load i32, ptr %61, align 4, !tbaa !9
+    %66 = trunc i32 %65 to i8
+    %.neg = mul i8 %66, -6
+    %conv36 = sub i16 %conv36121128, %63
+    %and = and i16 %and122127, %conv61
+    %67 = add i8 %.neg, %conv81118.lcssa124126
+    %68 = call i1 @llvm.loop.decrement.i64(i64 1)
+    br i1 %68, label %for.body31, label %for.cond21.for.cond.cleanup25_crit_edge, !llvm.loop !19
+  }
+  
+  ; Function Attrs: nofree nounwind
+  declare noundef signext i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr #0
+  
+  ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+  declare i32 @llvm.smin.i32(i32, i32) #1
+  
+  ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+  declare i16 @llvm.smin.i16(i16, i16) #1
+  
+  ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+  declare i64 @llvm.smax.i64(i64, i64) #1
+  
+  ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
+  declare void @llvm.set.loop.iterations.i64(i64) #2
+  
+  ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
+  declare i1 @llvm.loop.decrement.i64(i64) #2
+  
+  attributes #0 = { nofree nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr7" "target-features"="+altivec,+bpermd,+extdiv,+isa-v206-instructions,+vsx,-aix-small-local-exec-tls,-crbits,-crypto,-direct-move,-htm,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe" }
+  attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+  attributes #2 = { nocallback noduplicate nofree nosync nounwind willreturn }
+  
+  !llvm.module.flags = !{!0, !1}
+  !llvm.ident = !{!2}
+  
+  !0 = !{i32 1, !"wchar_size", i32 4}
+  !1 = !{i32 8, !"PIC Level", i32 2}
+  !2 = !{!"IBM Open XL C/C++ for AIX 17.1.3 (5725-C72, 5765-J18), version 17.1.3.0, clang version 19.0.0git"}
+  !3 = !{!4, !4, i64 0}
+  !4 = !{!"short", !5, i64 0}
+  !5 = !{!"omnipotent char", !6, i64 0}
+  !6 = !{!"Simple C/C++ TBAA"}
+  !7 = !{!8, !8, i64 0}
+  !8 = !{!"long long", !5, i64 0}
+  !9 = !{!10, !10, i64 0}
+  !10 = !{!"int", !5, i64 0}
+  !11 = !{!5, !5, i64 0}
+  !12 = distinct !{!12, !13, !14, !15}
+  !13 = !{!"llvm.loop.mustprogress"}
+  !14 = !{!"llvm.loop.isvectorized", i32 1}
+  !15 = !{!"llvm.loop.unroll.runtime.disable"}
+  !16 = !{!17, !17, i64 0}
+  !17 = !{!"_Bool", !5, i64 0}
+  !18 = distinct !{!18, !13}
+  !19 = distinct !{!19, !13, !14}
+
+...
+---
+name:            main
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gprc, preferred-register: '' }
+  - { id: 1, class: gprc, preferred-register: '' }
+  - { id: 2, class: gprc, preferred-register: '' }
+  - { id: 3, class: gprc, preferred-register: '' }
+  - { id: 4, class: gprc, preferred-register: '' }
+  - { id: 5, class: gprc, preferred-register: '' }
+  - { id: 6, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 7, class: gprc, preferred-register: '' }
+  - { id: 8, class: gprc, preferred-register: '' }
+  - { id: 9, class: gprc, preferred-register: '' }
+  - { id: 10, class: g8rc, preferred-register: '' }
+  - { id: 11, class: g8rc, preferred-register: '' }
+  - { id: 12, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 13, class: g8rc, preferred-register: '' }
+  - { id: 14, class: g8rc, preferred-register: '' }
+  - { id: 15, class: g8rc, preferred-register: '' }
+  - { id: 16, class: g8rc, preferred-register: '' }
+  - { id: 17, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 18, class: gprc, preferred-register: '' }
+  - { id: 19, class: gprc, preferred-register: '' }
+  - { id: 20, class: gprc, preferred-register: '' }
+  - { id: 21, class: gprc, preferred-register: '' }
+  - { id: 22, class: gprc, preferred-register: '' }
+  - { id: 23, class: gprc, preferred-register: '' }
+  - { id: 24, class: gprc, preferred-register: '' }
+  - { id: 25, class: gprc, preferred-register: '' }
+  - { id: 26, class: gprc, preferred-register: '' }
+  - { id: 27, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 28, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 29, class: g8rc, preferred-register: '' }
+  - { id: 30, class: g8rc, preferred-register: '' }
+  - { id: 31, class: gprc, preferred-register: '' }
+  - { id: 32, class: gprc, preferred-register: '' }
+  - { id: 33, class: gprc, preferred-register: '' }
+  - { id: 34, class: gprc, preferred-register: '' }
+  - { id: 35, class: gprc, preferred-register: '' }
+  - { id: 36, class: gprc, preferred-register: '' }
+  - { id: 37, class: gprc, preferred-register: '' }
+  - { id: 38, class: gprc, preferred-register: '' }
+  - { id: 39, class: gprc, preferred-register: '' }
+  - { id: 40, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 41, class: gprc, preferred-register: '' }
+  - { id: 42, class: gprc, preferred-register: '' }
+  - { id: 43, class: gprc, preferred-register: '' }
+  - { id: 44, class: g8rc, preferred-register: '' }
+  - { id: 45, class: g8rc, preferred-register: '' }
+  - { id: 46, class: gprc, preferred-register: '' }
+  - { id: 47, class: gprc, preferred-register: '' }
+  - { id: 48, class: gprc, preferred-register: '' }
+  - { id: 49, class: gprc, preferred-register: '' }
+  - { id: 50, class: gprc, preferred-register: '' }
+  - { id: 51, class: gprc, preferred-register: '' }
+  - { id: 52, class: gprc, preferred-register: '' }
+  - { id: 53, class: gprc, preferred-register: '' }
+  - { id: 54, class: gprc, preferred-register: '' }
+  - { id: 55, class: gprc, preferred-register: '' }
+  - { id: 56, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 57, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 58, class: g8rc, preferred-register: '' }
+  - { id: 59, class: g8rc, preferred-register: '' }
+  - { id: 60, class: gprc, preferred-register: '' }
+  - { id: 61, class: gprc, preferred-register: '' }
+  - { id: 62, class: gprc, preferred-register: '' }
+  - { id: 63, class: gprc, preferred-register: '' }
+  - { id: 64, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 65, class: g8rc, preferred-register: '' }
+  - { id: 66, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 67, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 68, class: gprc, preferred-register: '' }
+  - { id: 69, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 70, class: g8rc, preferred-register: '' }
+  - { id: 71, class: g8rc, preferred-register: '' }
+  - { id: 72, class: g8rc, preferred-register: '' }
+  - { id: 73, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 74, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 75, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 76, class: gprc, preferred-register: '' }
+  - { id: 77, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 78, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 79, class: gprc, preferred-register: '' }
+  - { id: 80, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 81, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 82, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 83, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 84, class: g8rc, preferred-register: '' }
+  - { id: 85, class: g8rc, preferred-register: '' }
+  - { id: 86, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 87, class: crrc, preferred-register: '' }
+  - { id: 88, class: gprc, preferred-register: '' }
+  - { id: 89, class: crrc, preferred-register: '' }
+  - { id: 90, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 91, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 92, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 93, class: g8rc, preferred-register: '' }
+  - { id: 94, class: crrc, preferred-register: '' }
+  - { id: 95, class: gprc, preferred-register: '' }
+  - { id: 96, class: gprc, preferred-register: '' }
+  - { id: 97, class: crrc, preferred-register: '' }
+  - { id: 98, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 99, class: g8rc, preferred-register: '' }
+  - { id: 100, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 101, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 102, class: g8rc, preferred-register: '' }
+  - { id: 103, class: g8rc, preferred-register: '' }
+  - { id: 104, class: g8rc, preferred-register: '' }
+  - { id: 105, class: g8rc, preferred-register: '' }
+  - { id: 106, class: g8rc, preferred-register: '' }
+  - { id: 107, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 108, class: g8rc, preferred-register: '' }
+  - { id: 109, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 110, class: g8rc, preferred-register: '' }
+  - { id: 111, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 112, class: crrc, preferred-register: '' }
+  - { id: 113, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 114, class: crrc, preferred-register: '' }
+  - { id: 115, class: gprc, preferred-register: '' }
+  - { id: 116, class: gprc, preferred-register: '' }
+  - { id: 117, class: gprc, preferred-register: '' }
+  - { id: 118, class: gprc, preferred-register: '' }
+  - { id: 119, class: gprc, preferred-register: '' }
+  - { id: 120, class: gprc, preferred-register: '' }
+  - { id: 121, class: gprc, preferred-register: '' }
+  - { id: 122, class: gprc, preferred-register: '' }
+  - { id: 123, class: gprc, preferred-register: '' }
+  - { id: 124, class: gprc, preferred-register: '' }
+  - { id: 125, class: crbitrc, preferred-register: '' }
+  - { id: 126, class: crrc, preferred-register: '' }
+  - { id: 127, class: g8rc, preferred-register: '' }
+  - { id: 128, class: g8rc, preferred-register: '' }
+  - { id: 129, class: g8rc, preferred-register: '' }
+  - { id: 130, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 131, class: g8rc, preferred-register: '' }
+  - { id: 132, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 133, class: crrc, preferred-register: '' }
+  - { id: 134, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 135, class: g8rc, preferred-register: '' }
+  - { id: 136, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 137, class: g8rc, preferred-register: '' }
+  - { id: 138, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 139, class: g8rc, preferred-register: '' }
+  - { id: 140, class: gprc, preferred-register: '' }
+  - { id: 141, class: gprc, preferred-register: '' }
+  - { id: 142, class: gprc, preferred-register: '' }
+  - { id: 143, class: crbitrc, preferred-register: '' }
+  - { id: 144, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 145, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 146, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 147, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 148, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 149, class: crrc, preferred-register: '' }
+  - { id: 150, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 151, class: gprc_and_gprc_nor0, preferred-register: '' }
+  - { id: 152, class: crrc, preferred-register: '' }
+  - { id: 153, class: gprc, preferred-register: '' }
+  - { id: 154, class: g8rc, preferred-register: '' }
+  - { id: 155, class: g8rc_and_g8rc_nox0, preferred-register: '' }
+  - { id: 156, class: g8rc, preferred-register: '' }
+  - { id: 157, class: g8rc, preferred-register: '' }
+  - { id: 158, class: g8rc, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    8
+  adjustsStack:    false
+  hasCalls:        true
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    successors: %bb.2(0x80000000)
+  
+    %64:g8rc_and_g8rc_nox0 = LDtoc @shortArray, $x2 :: (load (s64) from got)
+    %65:g8rc = LI8 -1
+    STH8 killed %65, 6, %64 :: (store (s16) into `ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3)`, !tbaa !3)
+    %66:g8rc_and_g8rc_nox0 = LDtoc @largeNumber, $x2 :: (load (s64) from got)
+    %67:gprc_and_gprc_nor0 = LHA 6, killed %66 :: (dereferenceable load (s16) from @largeNumber + 6, basealign 8, !tbaa !7)
+    %68:gprc = ADDI killed %67, -1705
+    ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+    %69:g8rc_and_g8rc_nox0 = LDtoc @_MergedGlobals, $x2 :: (load (s64) from got)
+    %70:g8rc = nuw ADDI8 killed %69, 29
+    %71:g8rc = EXTSW_32_64 killed %68
+    $x3 = COPY %70
+    $x4 = COPY %71
+    BL8_NOP <mcsymbol .printf>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x2, implicit-def $r1, implicit-def $x3
+    ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+    %73:g8rc_and_g8rc_nox0 = LDtoc @globalShortValue, $x2 :: (load (s64) from got)
+    %0:gprc = LHZ 0, killed %73 :: (dereferenceable load (s16) from @globalShortValue, !tbaa !3)
+    %74:g8rc_and_g8rc_nox0 = LDtoc @someIntValue, $x2 :: (load (s64) from got)
+    %75:gprc_and_gprc_nor0 = LBZ 3, killed %74 :: (dereferenceable load (s8) from @someIntValue + 3, basealign 4, !tbaa !9)
+    %76:gprc = ADDI killed %75, -19
+    %1:gprc = EXTSB %76
+    %77:g8rc_and_g8rc_nox0 = LDtoc @unitIncrement, $x2 :: (load (s64) from got)
+    %2:gprc = LWZ 0, killed %77 :: (dereferenceable load (s32) from @unitIncrement)
+    %78:g8rc_and_g8rc_nox0 = LDtoc @globalCharValue, $x2 :: (load (s64) from got)
+    %79:gprc = LBZ 0, killed %78 :: (dereferenceable load (s8) from @globalCharValue)
+    %80:gprc_and_gprc_nor0 = EXTSB killed %79
+    %81:g8rc_and_g8rc_nox0 = LDtoc @computedResultShort, $x2 :: (load (s64) from got)
+    %3:gprc = LHZ 0, %81 :: (dereferenceable load (s16) from @computedResultShort, !tbaa !3)
+    %82:g8rc_and_g8rc_nox0 = LDtoc @resultArray, $x2 :: (load (s64) from got)
+    %4:gprc = LHZ 0, %82 :: (dereferenceable load (s16) from @resultArray, !tbaa !3)
+    %83:g8rc_and_g8rc_nox0 = LDtoc @computedResultChar, $x2 :: (load (s64) from got)
+    %5:gprc = LBZ 0, %83 :: (dereferenceable load (s8) from @computedResultChar, !tbaa !11)
+    %85:g8rc = IMPLICIT_DEF
+    %84:g8rc = INSERT_SUBREG %85, %76, %subreg.sub_32
+    %6:g8rc_and_g8rc_nox0 = EXTSB8 killed %84
+    %7:gprc = LHZ 6, %64 :: (dereferenceable load (s16) from `ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3)`, !tbaa !3)
+    %86:gprc_and_gprc_nor0 = LHA 4, %64 :: (dereferenceable load (s16) from `ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 2)`)
+    ; CHECK:       %162:g8rc = LHA8 6, %64
+    ; CHECK-NEXT:  %150:gprc_and_gprc_nor0 = COPY killed %162.sub_32
+    %87:crrc = CMPW %80, %86
+    %88:gprc = ISEL %80, %86, %87.sub_lt
+    %89:crrc = CMPLWI killed %88, 0
+    %91:gprc_and_gprc_nor0 = LI 1
+    %8:gprc = ISEL $zero, %91, %89.sub_eq
+    %92:g8rc_and_g8rc_nox0 = LDtoc @ullArray, $x2 :: (load (s64) from got)
+    %93:g8rc = LD 24, killed %92 :: (dereferenceable load (s64) from `ptr getelementptr inbounds ([8 x i64], ptr @ullArray, i64 0, i64 3)`)
+    %94:crrc = CMPLDI killed %93, 0
+    $cr7 = COPY %94
+    %95:gprc = MFOCRF $cr7
+    %96:gprc = RLWINM killed %95, 31, 31, 31
+    %9:gprc = XORI killed %96, 1
+    %97:crrc = CMPDI %6, 4
+    %98:g8rc_and_g8rc_nox0 = LI8 4
+    %99:g8rc = ISEL8 %6, %98, %97.sub_gt
+    %100:g8rc_and_g8rc_nox0 = SUBF8 %6, killed %99
+    %10:g8rc = ADDI8 killed %100, 3
+    %101:g8rc_and_g8rc_nox0 = RLDICL %10, 62, 2
+    %11:g8rc = nuw nsw ADDI8 killed %101, 1
+    %102:g8rc = RLDICL %11, 63, 1
+    %12:g8rc_and_g8rc_nox0 = RLDICL killed %102, 1, 1
+    %103:g8rc = RLDICR %11, 2, 60
+    %13:g8rc = ADD8 killed %103, %6
+    %104:g8rc = RLDICR %6, 2, 61
+    %105:g8rc = RLDICR %6, 3, 60
+    %106:g8rc = LDtoc @longArray, $x2 :: (load (s64) from got)
+    %107:g8rc_and_g8rc_nox0 = ADD8 killed %105, %106
+    %14:g8rc = ADDI8 killed %107, -64
+    %108:g8rc = LDtoc @intArray, $x2 :: (load (s64) from got)
+    %109:g8rc_and_g8rc_nox0 = ADD8 killed %104, %108
+    %15:g8rc = ADDI8 killed %109, 64
+    %110:g8rc = nsw ADDI8 %12, -2
+    %111:g8rc_and_g8rc_nox0 = RLDICL %110, 63, 1
+    %16:g8rc = nuw ADDI8 killed %111, 1
+    %63:gprc = LI 8
+    %112:crrc = CMPWI %1, 7
+    %113:g8rc_and_g8rc_nox0 = LDtoc @computedResultUChar, $x2 :: (load (s64) from got)
+    %114:crrc = CMPLDI %10, 4
+    %118:gprc = LIS 0
+    %116:gprc = ORI %118, 65535
+    %126:crrc = CMPLD %11, %12
+    B %bb.2
+  
+  bb.1.for.cond.cleanup15:
+    %150:gprc_and_gprc_nor0 = EXTSH %7
+    %151:gprc_and_gprc_nor0 = EXTSH %0
+
+    ; CHECK:      %159:g8rc = IMPLICIT_DEF
+    ; CHECK-NEXT: %160:g8rc = INSERT_SUBREG %159, %0, %subreg.sub_32 
+    ; CHECK-NEXT: %161:g8rc = EXTSH8 killed %160
+    ; CHECK-NEXT: %151:gprc_and_gprc_nor0 = COPY killed %161.sub_32
+
+    %152:crrc = CMPW %151, %150
+    %153:gprc = ISEL %151, %150, %152.sub_lt
+    %154:g8rc = EXTSW_32_64 killed %153
+    ; CHECK-NOT: %154:g8rc = EXTSW_32_64 killed %153 
+    %155:g8rc_and_g8rc_nox0 = LDtoc @computedResultUll, $x2 :: (load (s64) from got)
+    STD %154, 0, killed %155 :: (store (s64) into @computedResultUll, !tbaa !7)
+    ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
+    %156:g8rc = LDtoc @_MergedGlobals, $x2 :: (load (s64) from got)
+    $x3 = COPY %156
+    $x4 = COPY %154
+    BL8_NOP <mcsymbol .printf>, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x4, implicit $x2, implicit-def $r1, implicit-def $x3
+    ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+    %158:g8rc = LI8 0
+    $x3 = COPY %158
+    BLR8 implicit $lr8, implicit $rm, implicit $x3
+  
+  bb.2.for.body16:
+    successors: %bb.3(0x40000000), %bb.9(0x40000000)
+  
+    %17:gprc_and_gprc_nor0 = PHI %63, %bb.0, %52, %bb.9
+    %18:gprc = PHI %3, %bb.0, %51, %bb.9
+    %19:gprc = PHI %4, %bb.0, %50, %bb.9
+    %20:gprc = PHI %5, %bb.0, %49, %bb.9
+    BCC 44, %112, %bb.9
+    B %bb.3
+  
+  bb.3.for.body31.lr.ph:
+    successors: %bb.7(0x40000000), %bb.4(0x40000000)
+  
+    STB %8, 0, %113 :: (store (s8) into @computedResultUChar, !tbaa !11)
+    BCC 12, %114, %bb.7
+    B %bb.4
+  
+  bb.4.vector.body.preheader:
+    successors: %bb.5(0x80000000)
+  
+    MTCTR8loop %16, implicit-def dead $ctr8
+    %117:gprc = LI 0
+    %115:gprc = COPY %117
+  
+  bb.5.vector.body:
+    successors: %bb.5(0x7c000000), %bb.6(0x04000000)
+  
+    %21:gprc = PHI %18, %bb.4, %31, %bb.5
+    %22:gprc = PHI %115, %bb.4, %32, %bb.5
+    %23:gprc = PHI %19, %bb.4, %33, %bb.5
+    %24:gprc = PHI %116, %bb.4, %34, %bb.5
+    %25:gprc = PHI %20, %bb.4, %35, %bb.5
+    %26:gprc = PHI %117, %bb.4, %36, %bb.5
+    %27:g8rc_and_g8rc_nox0 = PHI %14, %bb.4, %30, %bb.5
+    %28:g8rc_and_g8rc_nox0 = PHI %15, %bb.4, %29, %bb.5
+    %29:g8rc = ADDI8 %28, 32
+    %30:g8rc = ADDI8 %27, 64
+    %119:gprc = LHZ 70, %27 :: (load (s16) from %ir.31 + 6, basealign 8, !tbaa !7)
+    %120:gprc = LHZ 102, %27 :: (load (s16) from %ir.32 + 6, basealign 8, !tbaa !7)
+    %121:gprc = LBZ 35, %28 :: (load (s8) from %ir.29 + 3, basealign 4, !tbaa !9)
+    %122:gprc = LBZ 51, %28 :: (load (s8) from %ir.30 + 3, basealign 4, !tbaa !9)
+    %123:gprc = MULLI killed %121, -6
+    %124:gprc = MULLI killed %122, -6
+    %31:gprc = SUBF %2, %21
+    %32:gprc = SUBF %2, %22
+    %33:gprc = AND %23, killed %119
+    %34:gprc = AND %24, killed %120
+    %35:gprc = ADD4 killed %123, %25
+    %36:gprc = ADD4 killed %124, %26
+    BDNZ8 %bb.5, implicit-def $ctr8, implicit $ctr8
+    B %bb.6
+  
+  bb.6.middle.block:
+    successors: %bb.8(0x40000000), %bb.7(0x40000000)
+  
+    %37:gprc = ADD4 %32, %31
+    %38:gprc = AND %34, %33
+    %39:gprc = ADD4 %36, %35
+    BCC 76, %126, %bb.8
+    B %bb.7
+  
+  bb.7.for.body31.preheader:
+    successors: %bb.10(0x80000000)
+  
+    %40:g8rc_and_g8rc_nox0 = PHI %6, %bb.3, %13, %bb.6
+    %41:gprc = PHI %18, %bb.3, %37, %bb.6
+    %42:gprc = PHI %19, %bb.3, %38, %bb.6
+    %43:gprc = PHI %20, %bb.3, %39, %bb.6
+    %127:g8rc = RLDICR %40, 2, 61
+    %128:g8rc = RLDICR %40, 3, 60
+    %130:g8rc_and_g8rc_nox0 = ADD8 %106, killed %128
+    %44:g8rc = ADDI8 killed %130, -32
+    %132:g8rc_and_g8rc_nox0 = ADD8 %108, killed %127
+    %45:g8rc = ADDI8 killed %132, 80
+    %133:crrc = CMPDI %40, 4
+    %135:g8rc = ISEL8 %40, %98, %133.sub_gt
+    %136:g8rc_and_g8rc_nox0 = SUBF8 %40, killed %135
+    %137:g8rc = ADDI8 killed %136, 3
+    %138:g8rc_and_g8rc_nox0 = RLDICL %137, 62, 2
+    %139:g8rc = nuw nsw ADDI8 killed %138, 1
+    MTCTR8loop killed %139, implicit-def dead $ctr8
+    B %bb.10
+  
+  bb.8.for.cond21.for.cond.cleanup25_crit_edge:
+    successors: %bb.9(0x80000000)
+  
+    %46:gprc = PHI %37, %bb.6, %60, %bb.10
+    %47:gprc = PHI %38, %bb.6, %61, %bb.10
+    %48:gprc = PHI %39, %bb.6, %62, %bb.10
+    STH %46, 0, %81 :: (store (s16) into @computedResultShort, !tbaa !3)
+    %145:g8rc_and_g8rc_nox0 = LDtoc @charArray, $x2 :: (load (s64) from got)
+    STB %0, 131, killed %145 :: (store (s8) into `ptr getelementptr inbounds ([8 x [8 x [8 x i8]]], ptr @charArray, i64 0, i64 2, i64 0, i64 3)`, !tbaa !11)
+    STH %47, 0, %82 :: (store (s16) into @resultArray, !tbaa !3)
+    %147:g8rc_and_g8rc_nox0 = LDtoc @computedResultBool, $x2 :: (load (s64) from got)
+    STB %9, 0, killed %147 :: (store (s8) into @computedResultBool, !tbaa !16)
+    STB %48, 0, %83 :: (store (s8) into @computedResultChar, !tbaa !11)
+  
+  bb.9.for.cond.cleanup25:
+    successors: %bb.1(0x04000000), %bb.2(0x7c000000)
+  
+    %49:gprc = PHI %20, %bb.2, %48, %bb.8
+    %50:gprc = PHI %19, %bb.2, %47, %bb.8
+    %51:gprc = PHI %18, %bb.2, %46, %bb.8
+    %52:gprc = nsw ADDI %17, -1
+    %149:crrc = CMPLWI %52, 0
+    BCC 76, killed %149, %bb.1
+    B %bb.2
+  
+  bb.10.for.body31:
+    successors: %bb.10(0x7c000000), %bb.8(0x04000000)
+  
+    %53:gprc = PHI %41, %bb.7, %60, %bb.10
+    %54:gprc = PHI %42, %bb.7, %61, %bb.10
+    %55:gprc = PHI %43, %bb.7, %62, %bb.10
+    %56:g8rc_and_g8rc_nox0 = PHI %44, %bb.7, %59, %bb.10
+    %57:g8rc_and_g8rc_nox0 = PHI %45, %bb.7, %58, %bb.10
+    %58:g8rc = ADDI8 %57, 16
+    %59:g8rc = ADDI8 %56, 32
+    %140:gprc = LHZ 38, %56 :: (load (s16) from %ir.62 + 6, basealign 8, !tbaa !7)
+    %141:gprc = LBZ 19, %57 :: (load (s8) from %ir.61 + 3, basealign 4, !tbaa !9)
+    %142:gprc = MULLI killed %141, -6
+    %60:gprc = SUBF %2, %53
+    %61:gprc = AND %54, killed %140
+    %62:gprc = ADD4 killed %142, %55
+    BDNZ8 %bb.10, implicit-def $ctr8, implicit $ctr8
+    B %bb.8
+
+...
+
+
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
index a2a5c6c5eafb7f..13cdcd9079fc70 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
@@ -937,6 +937,8 @@ define i64 @setbsc3(i4 %a, i4 %b) {
 ; CHECK-PWR8-NEXT:    slwi r3, r3, 28
 ; CHECK-PWR8-NEXT:    srawi r4, r4, 28
 ; CHECK-PWR8-NEXT:    srawi r3, r3, 28
+; CHECK-PWR8-NEXT:    extsw r4, r4
+; CHECK-PWR8-NEXT:    extsw r3, r3
 ; CHECK-PWR8-NEXT:    cmpw r3, r4
 ; CHECK-PWR8-NEXT:    sub r5, r4, r3
 ; CHECK-PWR8-NEXT:    li r3, -1
diff --git a/llvm/test/CodeGen/PowerPC/select-constant-xor.ll b/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
index b40a21b82e836b..52a763262f59b6 100644
--- a/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/select-constant-xor.ll
@@ -42,6 +42,7 @@ define i64 @selecti32i64(i32 %a) {
 ; CHECK-LABEL: selecti32i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    srawi 3, 3, 31
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 65535
 ; CHECK-NEXT:    xoris 3, 3, 32767
 ; CHECK-NEXT:    blr
@@ -68,6 +69,7 @@ define i32 @selecti32i32(i32 %a) {
 ; CHECK-LABEL: selecti32i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    srawi 3, 3, 31
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 84
 ; CHECK-NEXT:    blr
   %c = icmp sgt i32 %a, -1
@@ -79,6 +81,7 @@ define i8 @selecti32i8(i32 %a) {
 ; CHECK-LABEL: selecti32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    srawi 3, 3, 31
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 84
 ; CHECK-NEXT:    blr
   %c = icmp sgt i32 %a, -1
@@ -91,6 +94,7 @@ define i32 @selecti8i32(i8 %a) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    extsb 3, 3
 ; CHECK-NEXT:    srawi 3, 3, 7
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    xori 3, 3, 84
 ; CHECK-NEXT:    blr
   %c = icmp sgt i8 %a, -1
diff --git a/llvm/test/CodeGen/PowerPC/sext_elimination.mir b/llvm/test/CodeGen/PowerPC/sext_elimination.mir
index e920848a4137cd..bf6b9005fcf7f2 100644
--- a/llvm/test/CodeGen/PowerPC/sext_elimination.mir
+++ b/llvm/test/CodeGen/PowerPC/sext_elimination.mir
@@ -41,8 +41,14 @@ body:             |
     ; CHECK: %4:g8rc = EXTSW_32_64 killed %3
     ; CHECK: %5:g8rc = INSERT_SUBREG %15, %1, %subreg.sub_32
     ; CHECK: %7:g8rc = EXTSW_32_64 killed %6
-    ; CHECK: %9:g8rc = INSERT_SUBREG %16, %8, %subreg.sub_32
-    ; CHECK: %11:g8rc = INSERT_SUBREG %17, %10, %subreg.sub_32
+    ; CHECK: %17:g8rc = INSERT_SUBREG %16, %1, %subreg.sub_32
+    ; CHECK-NEXT: %18:g8rc = ORIS8 killed %17, 32767
+    ; CHECK-NEXT: %8:gprc = COPY killed %18.sub_32
+    ; CHECK: %9:g8rc = INSERT_SUBREG %19, %8, %subreg.sub_32
+    ; CHECK: %21:g8rc = INSERT_SUBREG %20, %1, %subreg.sub_32
+    ; CHECK-NEXT: %22:g8rc = ORI8 killed %21, 32768
+    ; CHECK-NEXT: %10:gprc = COPY killed %22.sub_32
+    ; CHECK: %11:g8rc = INSERT_SUBREG %23, %10, %subreg.sub_32
     ; CHECK: %14:g8rc = COPY killed %13
 
     %0:g8rc_nox0 = COPY $x3
diff --git a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
index c8278e58ad064c..9d0c705ba53bd8 100644
--- a/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll
@@ -19,9 +19,10 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
 ; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $x3
 ; CHECK-NEXT:    cmpwi 2, 3, 2
 ; CHECK-NEXT:    li 4, 0
+; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $x4
+; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    std 0, 800(1)
 ; CHECK-NEXT:    mr 31, 1
-; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blt 2, .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %if.end
 ; CHECK-NEXT:    addi 3, 31, 112
@@ -67,6 +68,7 @@ define dso_local signext i32 @main(i32 signext %argc, ptr nocapture readnone %ar
 ; BE-NEXT:    stdu 1, -800(1)
 ; BE-NEXT:    li 4, 0
 ; BE-NEXT:    # kill: def $r3 killed $r3 killed $x3
+; BE-NEXT:    # kill: def $r4 killed $r4 killed $x4
 ; BE-NEXT:    cmpwi 2, 3, 2
 ; BE-NEXT:    mr 3, 4
 ; BE-NEXT:    std 0, 816(1)
diff --git a/llvm/test/CodeGen/PowerPC/store-forward-be64.ll b/llvm/test/CodeGen/PowerPC/store-forward-be64.ll
index 32e67c7ce127a1..720fec96eddc26 100644
--- a/llvm/test/CodeGen/PowerPC/store-forward-be64.ll
+++ b/llvm/test/CodeGen/PowerPC/store-forward-be64.ll
@@ -51,6 +51,7 @@ define signext i32 @stc1(ptr noundef byval(%struct.SST) align 8 %s) {
 ; CHECK-NEXT:    std 4, 48(1)
 ; CHECK-NEXT:    extsh 3, 3
 ; CHECK-NEXT:    srawi 3, 3, 8
+; CHECK-NEXT:    extsw 3, 3
 ; CHECK-NEXT:    blr
 entry:
   %0 = load i16, ptr %s, align 8

>From bdba8c4a393c7e8e9464ae5690830f887c2390a2 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 19 Mar 2024 10:13:52 -0400
Subject: [PATCH 2/4] adding support PPC::COPY

---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index b260565d64fbbf..124474aeb9bd54 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5224,7 +5224,8 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
                                                   MachineRegisterInfo *MRI,
                                                   unsigned BinOpDepth,
                                                   LiveVariables *LV) const {
-  if (MRI->getRegClass(Reg) == &PPC::G8RCRegClass)
+  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+  if (RC == &PPC::G8RCRegClass || RC == &PPC::GPRC_and_GPRC_NOR0RegClass)
     return;
 
   MachineInstr *MI = MRI->getVRegDef(Reg);
@@ -5240,6 +5241,7 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
   case PPC::ISEL:
     if (BinOpDepth < MAX_BINOP_DEPTH) {
       if (Opcode == PPC::OR || Opcode == PPC::ISEL)
+        // if (Opcode == PPC::OR)
         IsRelplaceIntr = true;
       unsigned OperandEnd = 3, OperandStride = 1;
       if (Opcode == PPC::PHI) {
@@ -5254,15 +5256,22 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
       }
     }
     break;
-    /*
   case PPC::COPY: {
     Register SrcReg = MI->getOperand(1).getReg();
     const MachineFunction *MF = MI->getMF();
     if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
       replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+      break;
     }
+    // From here on everything is SVR4ABI
+    if (MI->getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock())
+      break;
 
-  } break;*/
+    if (SrcReg != PPC::X3) {
+      replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+      break;
+    }
+  } break;
   case PPC::ORI:
   case PPC::XORI:
   case PPC::ORI8:

>From 680bcf62d3ce13d7995fb0cae9ff1724349c59ae Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 19 Mar 2024 11:47:26 -0400
Subject: [PATCH 3/4] fix some comment grammar

---
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 52 +++++++++++++++---------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 124474aeb9bd54..516a4e12c8347a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5224,10 +5224,6 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
                                                   MachineRegisterInfo *MRI,
                                                   unsigned BinOpDepth,
                                                   LiveVariables *LV) const {
-  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
-  if (RC == &PPC::G8RCRegClass || RC == &PPC::GPRC_and_GPRC_NOR0RegClass)
-    return;
-
   MachineInstr *MI = MRI->getVRegDef(Reg);
   if (!MI)
     return;
@@ -5240,9 +5236,6 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
   case PPC::PHI:
   case PPC::ISEL:
     if (BinOpDepth < MAX_BINOP_DEPTH) {
-      if (Opcode == PPC::OR || Opcode == PPC::ISEL)
-        // if (Opcode == PPC::OR)
-        IsRelplaceIntr = true;
       unsigned OperandEnd = 3, OperandStride = 1;
       if (Opcode == PPC::PHI) {
         OperandEnd = MI->getNumOperands();
@@ -5254,6 +5247,11 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
         Register SrcReg = MI->getOperand(I).getReg();
         replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth + 1, LV);
       }
+
+      if (Opcode == PPC::OR || Opcode == PPC::ISEL)
+        IsRelplaceIntr = true;
+      else
+        return;
     }
     break;
   case PPC::COPY: {
@@ -5261,17 +5259,18 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
     const MachineFunction *MF = MI->getMF();
     if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
       replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
-      break;
+      return;
     }
     // From here on everything is SVR4ABI
     if (MI->getParent()->getBasicBlock() == &MF->getFunction().getEntryBlock())
-      break;
+      return;
 
     if (SrcReg != PPC::X3) {
       replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
-      break;
+      return;
     }
-  } break;
+  }
+    return;
   case PPC::ORI:
   case PPC::XORI:
   case PPC::ORI8:
@@ -5280,22 +5279,27 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
   case PPC::XORIS:
   case PPC::ORIS8:
   case PPC::XORIS8: {
+    Register SrcReg = MI->getOperand(1).getReg();
+    replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+
     if (Opcode == PPC::ORI || Opcode == PPC::XORI || Opcode == PPC::ORIS ||
         Opcode == PPC::ORIS || Opcode == PPC::XORIS)
       IsRelplaceIntr = true;
-    Register SrcReg = MI->getOperand(1).getReg();
-    replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+    else
+      return;
     break;
   }
   case PPC::AND:
   case PPC::AND8: {
     if (BinOpDepth < MAX_BINOP_DEPTH) {
-      if (Opcode == PPC::AND)
-        IsRelplaceIntr = true;
       Register SrcReg1 = MI->getOperand(1).getReg();
       replaceInstrAfterElimExt32To64(SrcReg1, MRI, BinOpDepth, LV);
       Register SrcReg2 = MI->getOperand(2).getReg();
       replaceInstrAfterElimExt32To64(SrcReg2, MRI, BinOpDepth, LV);
+      if (Opcode == PPC::AND)
+        IsRelplaceIntr = true;
+      else
+        return;
     }
     break;
   }
@@ -5309,9 +5313,14 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
        !isOpZeroOfSubwordPreincLoad(Opcode)) ||
       IsRelplaceIntr) {
 
-    // Fix Me: Most of the opcode of 64-bit instruction equal to the opcode of
-    // 32-bit version of same instruction plus one. But there are some
-    // exception: PPC::ANDC_rec, PPC::ANDI_rec, PPC::ANDIS_rec.
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    assert(RC != &PPC::G8RCRegClass && RC != &PPC::G8RC_and_G8RC_NOX0RegClass &&
+           "Must be 32-bit Register!");
+
+    // Fix Me: Most of the pseudo-opcode of 64-bit instruction are equal to
+    // the pseudo-opcode of the 32-bit version of the same instruction plus
+    // one. However, there are some exceptions: PPC::ANDC_rec,
+    // PPC::ANDI_rec, PPC::ANDIS_rec.
     unsigned NewOpcode = Opcode + 1;
 
     if (Opcode == PPC::ANDC_rec)
@@ -5335,8 +5344,11 @@ void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
     DebugLoc DL = MI->getDebugLoc();
     auto MBB = MI->getParent();
 
-    // If the oprand of the instruction is Register which isPPC::GRCRegClass, we
-    // need to promot the Oprande to PPC::G8RCRegClass.
+    // Since the pseudo-opcode of the instruction is promoted from 32-bit to
+    // 64-bit, if the operand of the original instruction belongs to
+    // PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
+    // the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
+    // respectively.
     DenseMap<unsigned, Register> PromoteRegs;
     DenseMap<unsigned, Register> ReCalRegs;
     for (unsigned i = 1; i < MI->getNumOperands(); i++) {

>From 5b5e3c706219eaeea41ab317ebc42562e933f8f0 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Tue, 19 Mar 2024 13:39:07 -0400
Subject: [PATCH 4/4] add checking isel8 in test case

---
 .../peephole-replaceInstr-after-eliminate-extsw.mir       | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
index 1b54ba7a38b816..17bfb4c19241ac 100644
--- a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
+++ b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
@@ -546,14 +546,18 @@ body:             |
     %151:gprc_and_gprc_nor0 = EXTSH %0
 
     ; CHECK:      %159:g8rc = IMPLICIT_DEF
-    ; CHECK-NEXT: %160:g8rc = INSERT_SUBREG %159, %0, %subreg.sub_32 
+    ; CHECK-NEXT: %160:g8rc = INSERT_SUBREG %159, %0, %subreg.sub_32
     ; CHECK-NEXT: %161:g8rc = EXTSH8 killed %160
     ; CHECK-NEXT: %151:gprc_and_gprc_nor0 = COPY killed %161.sub_32
 
     %152:crrc = CMPW %151, %150
     %153:gprc = ISEL %151, %150, %152.sub_lt
     %154:g8rc = EXTSW_32_64 killed %153
-    ; CHECK-NOT: %154:g8rc = EXTSW_32_64 killed %153 
+    ; CHECK:      %165:g8rc = IMPLICIT_DEF
+    ; CHECK-NEXT: %166:g8rc = INSERT_SUBREG %165, %150, %subreg.sub_32
+    ; CHECK-NEXT: %167:g8rc = ISEL8 killed %164, killed %166, %152.sub_lt
+    ; CHECK-NEXT: %153:gprc = COPY killed %167.sub_32
+    ; CHECK-NOT: %154:g8rc = EXTSW_32_64 killed %153
     %155:g8rc_and_g8rc_nox0 = LDtoc @computedResultUll, $x2 :: (load (s64) from got)
     STD %154, 0, killed %155 :: (store (s64) into @computedResultUll, !tbaa !7)
     ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1



More information about the llvm-commits mailing list