[llvm] first implement of fixing issue 71030 (PR #85451)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 15 12:15:53 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: zhijian lin (diggerlin)
<details>
<summary>Changes</summary>
---
Patch is 49.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85451.diff
12 Files Affected:
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+135)
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.h (+5)
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+2-3)
- (modified) llvm/lib/Target/PowerPC/PPCMIPeephole.cpp (+1)
- (modified) llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir (+1-1)
- (modified) llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir (+4-4)
- (added) llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir (+698)
- (modified) llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll (+2)
- (modified) llvm/test/CodeGen/PowerPC/select-constant-xor.ll (+4)
- (modified) llvm/test/CodeGen/PowerPC/sext_elimination.mir (+8-2)
- (modified) llvm/test/CodeGen/PowerPC/stack-restore-with-setjmp.ll (+3-1)
- (modified) llvm/test/CodeGen/PowerPC/store-forward-be64.ll (+1)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 5f5eb31a5a85fa..0e9bdaf37d079d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -5219,6 +5219,141 @@ bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
// We limit the max depth to track incoming values of PHIs or binary ops
// (e.g. AND) to avoid excessive cost.
const unsigned MAX_BINOP_DEPTH = 1;
+
+void PPCInstrInfo::replaceInstrAfterElimExt32To64(const Register &Reg,
+ MachineRegisterInfo *MRI,
+ unsigned BinOpDepth,
+ LiveVariables *LV) const {
+ if (MRI->getRegClass(Reg) == &PPC::G8RCRegClass)
+ return;
+
+ MachineInstr *MI = MRI->getVRegDef(Reg);
+ if (!MI)
+ return;
+
+ unsigned Opcode = MI->getOpcode();
+ bool IsRelplaceIntr = false;
+ switch (Opcode) {
+ case PPC::OR:
+ case PPC::OR8:
+ case PPC::PHI:
+ case PPC::ISEL:
+ if (BinOpDepth < MAX_BINOP_DEPTH) {
+ if (Opcode == PPC::OR)
+ IsRelplaceIntr = true;
+ unsigned OperandEnd = 3, OperandStride = 1;
+ if (MI->getOpcode() == PPC::PHI) {
+ OperandEnd = MI->getNumOperands();
+ OperandStride = 2;
+ }
+
+ for (unsigned I = 1; I != OperandEnd; I += OperandStride) {
+ assert(MI->getOperand(I).isReg() && "Operand must be register");
+ Register SrcReg = MI->getOperand(I).getReg();
+ replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth + 1, LV);
+ }
+ }
+ break;
+ // case PPC::COPY:
+ case PPC::ORI:
+ case PPC::XORI:
+ case PPC::ORI8:
+ case PPC::XORI8:
+ case PPC::ORIS:
+ case PPC::XORIS:
+ case PPC::ORIS8:
+ case PPC::XORIS8: {
+ if (Opcode == PPC::ORI || Opcode == PPC::XORI || Opcode == PPC::ORIS ||
+ Opcode == PPC::ORIS || Opcode == PPC::XORIS)
+ IsRelplaceIntr = true;
+ Register SrcReg = MI->getOperand(1).getReg();
+ replaceInstrAfterElimExt32To64(SrcReg, MRI, BinOpDepth, LV);
+ break;
+ }
+ case PPC::AND:
+ case PPC::AND8: {
+ if (BinOpDepth < MAX_BINOP_DEPTH) {
+ if (Opcode == PPC::AND)
+ IsRelplaceIntr = true;
+ Register SrcReg1 = MI->getOperand(1).getReg();
+ replaceInstrAfterElimExt32To64(SrcReg1, MRI, BinOpDepth, LV);
+ Register SrcReg2 = MI->getOperand(2).getReg();
+ replaceInstrAfterElimExt32To64(SrcReg2, MRI, BinOpDepth, LV);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ const PPCInstrInfo *TII =
+ MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
+ if ((TII->isSExt32To64(Opcode) && !TII->isZExt32To64(Opcode)) ||
+ IsRelplaceIntr) {
+ DebugLoc DL = MI->getDebugLoc();
+ auto MBB = MI->getParent();
+
+ // If the oprand of the instruction is Register which isPPC::GRCRegClass, we
+ // need to promot the Oprande to PPC::G8RCRegClass.
+ DenseMap<unsigned, Register> PromoteRegs;
+ for (unsigned i = 1; i < MI->getNumOperands(); i++) {
+ MachineOperand &Oprand = MI->getOperand(i);
+ if (Oprand.isReg()) {
+ Register OprandReg = Oprand.getReg();
+ if (!OprandReg.isVirtual())
+ continue;
+ if (MRI->getRegClass(OprandReg) == &PPC::GPRCRegClass) {
+ Register TmpReg = MRI->createVirtualRegister(&PPC::G8RCRegClass);
+ Register DstTmpReg = MRI->createVirtualRegister(&PPC::G8RCRegClass);
+
+ BuildMI(*MBB, MI, DL, TII->get(PPC::IMPLICIT_DEF), TmpReg);
+ BuildMI(*MBB, MI, DL, TII->get(PPC::INSERT_SUBREG), DstTmpReg)
+ .addReg(TmpReg)
+ .addReg(OprandReg)
+ .addImm(PPC::sub_32);
+ PromoteRegs[i] = DstTmpReg;
+ } else {
+ PromoteRegs[i] = OprandReg;
+ }
+ }
+ }
+
+ Register NewReg = MRI->createVirtualRegister(&PPC::G8RCRegClass);
+ Register SrcReg = MI->getOperand(0).getReg();
+
+ // Most of the opcode of 64-bit instruction equal to the opcode of 32-bit
+ // version of same instruction plus one. But there are some exception:
+ // PPC::ANDC_rec, PPC::ANDI_rec, PPC::ANDIS_rec.
+ unsigned NewOpcode = Opcode + 1;
+
+ if (Opcode == PPC::ANDC_rec)
+ NewOpcode = PPC::ANDC8_rec;
+ if (Opcode == PPC::ANDI_rec)
+ NewOpcode = PPC::ANDI8_rec;
+ if (Opcode == PPC::ANDIS_rec)
+ NewOpcode = PPC::ANDIS8_rec;
+
+ BuildMI(*MBB, MI, DL, TII->get(NewOpcode), NewReg);
+ MachineBasicBlock::instr_iterator Iter(MI);
+ --Iter;
+ for (unsigned i = 1; i < MI->getNumOperands(); i++)
+ if (PromoteRegs.find(i) != PromoteRegs.end())
+ MachineInstrBuilder(*Iter->getMF(), Iter)
+ .addReg(PromoteRegs[i], RegState::Kill);
+ else
+ Iter->addOperand(MI->getOperand(i));
+
+ for (auto Iter = PromoteRegs.begin(); Iter != PromoteRegs.end(); Iter++)
+ LV->recomputeForSingleDefVirtReg(Iter->second);
+ MI->eraseFromParent();
+ BuildMI(*MBB, ++Iter, DL, TII->get(PPC::COPY), SrcReg)
+ .addReg(NewReg, RegState::Kill, PPC::sub_32);
+ LV->recomputeForSingleDefVirtReg(NewReg);
+ return;
+ }
+ return;
+}
+
// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
// does not count all of the recursions. The parameter BinOpDepth is incremented
// only when isSignOrZeroExtended calls itself more than once. This is done to
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 045932dc0d3ba1..f6e79707913c7b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -17,6 +17,7 @@
#include "PPC.h"
#include "PPCRegisterInfo.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
@@ -610,6 +611,10 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const MachineRegisterInfo *MRI) const {
return isSignOrZeroExtended(Reg, 0, MRI).second;
}
+ void replaceInstrAfterElimExt32To64(const Register &Reg,
+ MachineRegisterInfo *MRI,
+ unsigned BinOpDepth,
+ LiveVariables *LV) const;
bool convertToImmediateForm(MachineInstr &MI,
SmallSet<Register, 4> &RegsToUpdate,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 82da1a3c305983..7c94add841402a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2408,7 +2408,7 @@ defm SRW : XForm_6r<31, 536, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
[(set i32:$RA, (PPCsrl i32:$RST, i32:$RB))]>, ZExt32To64;
defm SRAW : XForm_6rc<31, 792, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
"sraw", "$RA, $RST, $RB", IIC_IntShift,
- [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>, SExt32To64;
+ [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>;
}
def : InstAlias<"mr $rA, $rB", (OR gprc:$rA, gprc:$rB, gprc:$rB)>;
@@ -2423,8 +2423,7 @@ let PPC970_Unit = 1 in { // FXU Operations.
let hasSideEffects = 0 in {
defm SRAWI : XForm_10rc<31, 824, (outs gprc:$RA), (ins gprc:$RST, u5imm:$RB),
"srawi", "$RA, $RST, $RB", IIC_IntShift,
- [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>,
- SExt32To64;
+ [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>;
defm CNTLZW : XForm_11r<31, 26, (outs gprc:$RA), (ins gprc:$RST),
"cntlzw", "$RA, $RST", IIC_IntGeneral,
[(set i32:$RA, (ctlz i32:$RST))]>, ZExt32To64;
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 494e4b52a5b5eb..76b9c19db2b3eb 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -1037,6 +1037,7 @@ bool PPCMIPeephole::simplifyCode() {
TII->isSignExtended(NarrowReg, MRI)) {
// We can eliminate EXTSW if the input is known to be already
// sign-extended.
+ TII->replaceInstrAfterElimExt32To64(NarrowReg, MRI, 0, LV);
LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
Register TmpReg =
MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
index dfbf412a939212..bcc1d29a3f6ea3 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
@@ -604,7 +604,7 @@ body: |
%2 = LI 48
%5 = COPY %0.sub_32
%8 = SRW killed %5, killed %2
- ; CHECK: LI 0
+ ; CHECK: LI8 0
; CHECK-LATE: li 3, 0
$x3 = EXTSW_32_64 %8
BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index 761316ed7726d7..f095ffa85f02db 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -1348,7 +1348,7 @@ body: |
%1 = LI 77
%2 = ADDI killed %1, 44
%3 = EXTSW_32_64 killed %2
- ; CHECK: LI 121
+ ; CHECK: LI8 121
; CHECK-LATE: li 3, 121
$x3 = COPY %3
BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -3573,7 +3573,7 @@ body: |
%0 = LI 777
%1 = ORI %0, 88
- ; CHECK: LI 857
+ ; CHECK: LI8 857
; CHECK-LATE: li 3, 857
$x3 = EXTSW_32_64 %1
BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -4145,7 +4145,7 @@ body: |
%3 = IMPLICIT_DEF
%2 = LI 17
%4 = RLWINM killed %2, 4, 20, 27
- ; CHECK: LI 272
+ ; CHECK: LI8 272
; CHECK-LATE: li 3, 272
$x3 = EXTSW_32_64 %4
BLR8 implicit $lr8, implicit $rm, implicit $x3
@@ -6456,7 +6456,7 @@ body: |
%0 = LI 871
%1 = XORI %0, 17
- ; CHECK: LI 886
+ ; CHECK: LI8 886
; CHECK-LATE: li 3, 886
$x3 = EXTSW_32_64 %1
BLR8 implicit $lr8, implicit $rm, implicit $x3
diff --git a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
new file mode 100644
index 00000000000000..1b54ba7a38b816
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir
@@ -0,0 +1,698 @@
+# RUN: llc -run-pass=ppc-mi-peepholes -mtriple powerpc64-ibm-aix-xcoff %s -o - \
+# RUN: -verify-machineinstrs | FileCheck %s
+
+--- |
+ ; ModuleID = '71030_tmp_reduce-O2.ll'
+ source_filename = "71030_tmp_reduce.c"
+ target datalayout = "E-m:a-Fi64-i64:64-n32:64-S128-v256:256:256-v512:512:512"
+ target triple = "powerpc64-ibm-aix-xcoff"
+
+ @globalShortValue = local_unnamed_addr global i16 1, align 2
+ @globalCharValue = local_unnamed_addr global i8 0, align 1
+ @largeNumber = local_unnamed_addr global i64 -3664682556119382352, align 8
+ @someIntValue = local_unnamed_addr global i32 378441747, align 4
+ @unitIncrement = local_unnamed_addr global i32 1, align 4
+ @computedResultUll = local_unnamed_addr global i64 0, align 8
+ @computedResultShort = local_unnamed_addr global i16 0, align 2
+ @computedResultUChar = local_unnamed_addr global i8 0, align 1
+ @computedResultBool = local_unnamed_addr global i8 0, align 1
+ @computedResultChar = local_unnamed_addr global i8 0, align 1
+ @shortArray = local_unnamed_addr global [8 x i16] zeroinitializer, align 2
+ @charArray = local_unnamed_addr global [8 x [8 x [8 x i8]]] zeroinitializer, align 1
+ @longArray = local_unnamed_addr global [8 x [8 x i64]] zeroinitializer, align 8
+ @resultArray = local_unnamed_addr global [8 x [8 x i16]] zeroinitializer, align 2
+ @ullArray = local_unnamed_addr global [8 x i64] zeroinitializer, align 8
+ @intArray = local_unnamed_addr global [8 x [8 x [8 x i32]]] zeroinitializer, align 4
+ @_MergedGlobals = private constant <{ [29 x i8], [46 x i8] }> <{ [29 x i8] c"Computed Result (ULL): %llx\0A\00", [46 x i8] c"Computed convert largeNumber&&&& (ULL): %llx\0A\00" }>, align 1
+
+ @.str.1 = private alias [29 x i8], ptr @_MergedGlobals
+ @.str = private alias [46 x i8], getelementptr inbounds (<{ [29 x i8], [46 x i8] }>, ptr @_MergedGlobals, i32 0, i32 1)
+
+ ; Function Attrs: nofree nounwind
+ define noundef signext i32 @main() local_unnamed_addr #0 {
+ entry:
+ store i16 -1, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3), align 2, !tbaa !3
+ %0 = load i64, ptr @largeNumber, align 8, !tbaa !7
+ %conv = trunc i64 %0 to i32
+ %sext = shl i32 %conv, 16
+ %conv1 = ashr exact i32 %sext, 16
+ %sub = add nsw i32 %conv1, -1705
+ %call = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) getelementptr inbounds (<{ [29 x i8], [46 x i8] }>, ptr @_MergedGlobals, i32 0, i32 1), i32 noundef signext %sub)
+ %1 = load i16, ptr @globalShortValue, align 2, !tbaa !3
+ %2 = load i32, ptr @someIntValue, align 4, !tbaa !9
+ %3 = trunc i32 %2 to i8
+ %conv20 = add i8 %3, -19
+ %4 = load i32, ptr @unitIncrement, align 4
+ %5 = load i8, ptr @globalCharValue, align 1
+ %conv45 = sext i8 %5 to i32
+ %computedResultShort.promoted = load i16, ptr @computedResultShort, align 2, !tbaa !3
+ %resultArray.promoted = load i16, ptr @resultArray, align 2, !tbaa !3
+ %computedResultChar.promoted149 = load i8, ptr @computedResultChar, align 1, !tbaa !11
+ %6 = sext i8 %conv20 to i64
+ %7 = load i16, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 3), align 2, !tbaa !3
+ %8 = load i16, ptr getelementptr inbounds ([8 x i16], ptr @shortArray, i64 0, i64 2), align 2
+ %conv46 = sext i16 %8 to i32
+ %cond54 = tail call i32 @llvm.smin.i32(i32 %conv45, i32 %conv46)
+ %tobool = icmp ne i32 %cond54, 0
+ %conv55 = zext i1 %tobool to i8
+ %9 = load i64, ptr getelementptr inbounds ([8 x i64], ptr @ullArray, i64 0, i64 3), align 8
+ %tobool72 = icmp ne i64 %9, 0
+ %frombool = zext i1 %tobool72 to i8
+ %smax = tail call i64 @llvm.smax.i64(i64 %6, i64 4)
+ %10 = add nuw nsw i64 %smax, 3
+ %11 = sub i64 %10, %6
+ %12 = lshr i64 %11, 2
+ %13 = add nuw nsw i64 %12, 1
+ %n.vec = and i64 %13, 9223372036854775806
+ %14 = shl i64 %n.vec, 2
+ %ind.end = add i64 %14, %6
+ %15 = shl i64 %6, 2
+ %16 = shl i64 %6, 3
+ %17 = add nsw i64 %16, -64
+ %scevgep30 = getelementptr i8, ptr @longArray, i64 %17
+ %18 = add nsw i64 %15, 64
+ %scevgep31 = getelementptr i8, ptr @intArray, i64 %18
+ %19 = lshr i64 %13, 1
+ %20 = shl nuw nsw i64 %19, 1
+ %21 = add nsw i64 %20, -2
+ %22 = lshr i64 %21, 1
+ %23 = add nuw i64 %22, 1
+ br label %for.body16
+
+ for.cond.cleanup15: ; preds = %for.cond.cleanup25
+ %24 = tail call i16 @llvm.smin.i16(i16 %1, i16 %7)
+ %conv11.le = sext i16 %24 to i64
+ store i64 %conv11.le, ptr @computedResultUll, align 8, !tbaa !7
+ %call97 = tail call signext i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @_MergedGlobals, i64 noundef %conv11.le)
+ ret i32 0
+
+ for.body16: ; preds = %for.cond.cleanup25, %entry
+ %lsr.iv29 = phi i32 [ %lsr.iv.next, %for.cond.cleanup25 ], [ 8, %entry ]
+ %conv36.lcssa132140 = phi i16 [ %computedResultShort.promoted, %entry ], [ %conv36.lcssa131, %for.cond.cleanup25 ]
+ %and.lcssa135139 = phi i16 [ %resultArray.promoted, %entry ], [ %and.lcssa134, %for.cond.cleanup25 ]
+ %conv81118.lcssa.lcssa137138 = phi i8 [ %computedResultChar.promoted149, %entry ], [ %conv81118.lcssa.lcssa136, %for.cond.cleanup25 ]
+ %25 = icmp slt i8 %conv20, 8
+ br i1 %25, label %for.body31.lr.ph, label %for.cond.cleanup25
+
+ for.body31.lr.ph: ; preds = %for.body16
+ %26 = icmp ult i64 %11, 4
+ store i8 %conv55, ptr @computedResultUChar, align 1, !tbaa !11
+ br i1 %26, label %for.body31.preheader, label %vector.body.preheader
+
+ vector.body.preheader: ; preds = %for.body31.lr.ph
+ call void @llvm.set.loop.iterations.i64(i64 %23)
+ br label %vector.body
+
+ vector.body: ; preds = %vector.body.preheader, %vector.body
+ %vec.phi = phi i16 [ %44, %vector.body ], [ %conv36.lcssa132140, %vector.body.preheader ]
+ %vec.phi159 = phi i16 [ %45, %vector.body ], [ 0, %vector.body.preheader ]
+ %vec.phi160 = phi i16 [ %46, %vector.body ], [ %and.lcssa135139, %vector.body.preheader ]
+ %vec.phi161 = phi i16 [ %47, %vector.body ], [ -1, %vector.body.preheader ]
+ %vec.phi162 = phi i8 [ %48, %vector.body ], [ %conv81118.lcssa.lcssa137138, %vector.body.preheader ]
+ %vec.phi163 = phi i8 [ %49, %vector.body ], [ 0, %vector.body.preheader ]
+ %27 = phi ptr [ %scevgep30, %vector.body.preheader ], [ %31, %vector.body ]
+ %28 = phi ptr [ %scevgep31, %vector.body.preheader ], [ %29, %vector.body ]
+ %29 = getelementptr i8, ptr %28, i64 32
+ %30 = getelementptr i8, ptr %29, i64 16
+ %31 = getelementptr i8, ptr %27, i64 64
+ %32 = getelementptr i8, ptr %31, i64 32
+ %33 = trunc i32 %4 to i16
+ %34 = load i64, ptr %31, align 8, !tbaa !7
+ %35 = load i64, ptr %32, align 8, !tbaa !7
+ %36 = trunc i64 %34 to i16
+ %37 = trunc i64 %35 to i16
+ %38 = load i32, ptr %29, align 4, !tbaa !9
+ %39 = load i32, ptr %30, align 4, !tbaa !9
+ %40 = trunc i32 %38 to i8
+ %41 = trunc i32 %39 to i8
+ %42 = mul i8 %40, -6
+ %43 = mul i8 %41, -6
+ %44 = sub i16 %vec.phi, %33
+ %45 = sub i16 %vec.phi159, %33
+ %46 = and i16 %vec.phi160, %36
+ %47 = and i16 %vec.phi161, %37
+ %48 = add i8 %42, %vec.phi162
+ %49 = add i8 %43, %vec.phi163
+ %50 = call i1 @llvm.loop.decrement.i64(i64 1)
+ br i1 %50, label %vector.body, label %middle.block, !llvm.loop !12
+
+ middle.block: ; preds = %vector.body
+ %51 = icmp eq i64 %13, %n.vec
+ %bin.rdx = add i16 %45, %44
+ %bin.rdx164 = and i16 %47, %46
+ %bin.rdx165 = add i8 %49, %48
+ br i1 %51, label %for.cond21.for.cond.cleanup25_crit_edge, label %for.body31.preheader
+
+ for.body31.preheader: ; preds = %middle.block, %for.body31.lr.ph
+ %indvars.iv.ph = phi i64 [ %6, %for.body31.lr.ph ], [ %ind.end, %middle.block ]
+ %conv36121128.ph = phi i16 [ %conv36.lcssa132140, %for.body31.lr.ph ], [ %bin.rdx, %middle.block ]
+ %and122127.ph = phi i16 [ %and.lcssa135139, %for.body31.lr.ph ], [ %bin.rdx164, %middle.block ]
+ %conv81118.lcssa124126.ph = phi i8 [ %conv81118.lcssa.lcssa137138, %for.body31.lr.ph ], [ %bin.rdx165, %middle.block ]
+ %52 = shl i64 %indvars.iv.ph, 2
+ %53 = shl i64 %indvars.iv.ph, 3
+ %scevgep = getelementptr i8, ptr getelementptr ([8 x [8 x i64]], ptr @longArray, i64 -1, i64 7, i64 4), i64 %53
+ %scevgep32 = getelementptr i8, ptr getelementptr inbounds ([8 x [8 x [8 x i32]]], ptr @intArray, i64 0, i64 0, i64 2, i64 4), i64 %52
+ %smax33 = call i64 @llvm.smax.i64(i64 %indvars.iv.ph, i64 4)
+ %54 = add i64 %smax33, 3
+ %55 = sub i64 %54, %indvars.iv.ph
+ %56 = lshr i64 %55, 2
+ %57 = add nuw nsw i64 %56, 1
+ call void @llvm.set.loop.iterations.i64(i64 %57)
+ br label %for.body31
+
+ for.cond21.for.cond.cleanup25_crit_edge: ; preds = %for.body31, %middle.block
+ %conv36.lcssa = phi i16 [ %bin.rdx, %middle.block ], [ %conv36, %for.body31 ]
+ %and.lcssa = phi i16 [ %bin.rdx164, %middle.block ], [ %and, %for.body31 ]
+ %.lcssa = phi i8 [ %bin.rdx165, %middle.block ], [ %67, %for.body31 ]
+ %58 = trunc i16 %1 to i8
+ store i16 %conv36.lcssa, ptr @computedResultShort, align 2, !tbaa !3
+ store i8 %58, ptr getelementptr inbounds ([8 x [8 x [8 x i8]]], ptr @charArray, i64 0, i64 2, i64 0, i64 3), align 1, !tbaa !11
+ store i16 %and.lcssa, ptr @resultArray, align 2, !tbaa !3
+ store i8 %frombool, ptr @computedResultBool, align 1, !tbaa !16
+ store i8 %.lcssa, ptr @computedResultChar, align...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/85451
More information about the llvm-commits
mailing list