[llvm] [Mips] Fix mfhi/mflo hazard miscompilation about div and mult (PR #91449)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 8 02:08:52 PDT 2024
https://github.com/yingopq updated https://github.com/llvm/llvm-project/pull/91449
>From 4e7d114c3223e414b59abc6dcfa582b55a606841 Mon Sep 17 00:00:00 2001
From: Ying Huang <ying.huang at oss.cipunited.com>
Date: Wed, 8 May 2024 04:50:41 -0400
Subject: [PATCH] [Mips] Fix mfhi/mflo hazard miscompilation about div and mult
Fix issue1: In mips1-4, require a minimum of 2 instructions between
a mflo/mfhi and the next mul/dmult/div/ddiv/divu/ddivu instruction.
Fix issue2: In mips1-4, should not put mflo into the delay slot for
the return.
Fix #81291
---
llvm/lib/Target/Mips/Mips.h | 11 ++
llvm/lib/Target/Mips/MipsBranchExpansion.cpp | 41 +++++-
llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp | 6 +
llvm/lib/Target/Mips/MipsInstrInfo.cpp | 15 ++
llvm/lib/Target/Mips/MipsInstrInfo.h | 4 +
llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll | 135 +++++++++++++++++-
.../Mips/llvm-ir/two-consecutive-div.ll | 70 +++++++++
.../Mips/llvm-ir/two-consecutive-mult.ll | 60 ++++++++
.../Mips/llvm-ir/two-consecutive-udiv.ll | 70 +++++++++
llvm/test/CodeGen/Mips/llvm-ir/udiv.ll | 131 ++++++++++++++++-
10 files changed, 531 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-div.ll
create mode 100644 llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-mult.ll
create mode 100644 llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-udiv.ll
diff --git a/llvm/lib/Target/Mips/Mips.h b/llvm/lib/Target/Mips/Mips.h
index f0cf039928c17..32df648f21b65 100644
--- a/llvm/lib/Target/Mips/Mips.h
+++ b/llvm/lib/Target/Mips/Mips.h
@@ -17,6 +17,17 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
+#define IsMFLOMFHI(instr) \
+ (instr == Mips::MFLO || instr == Mips::MFLO64 || instr == Mips::MFHI || \
+ instr == Mips::MFHI64)
+#define IsDIVMULT(instr) \
+ (instr == Mips::SDIV || instr == Mips::PseudoSDIV || instr == Mips::DSDIV || \
+ instr == Mips::PseudoDSDIV || instr == Mips::UDIV || \
+ instr == Mips::PseudoUDIV || instr == Mips::DUDIV || \
+ instr == Mips::PseudoDUDIV || instr == Mips::MULT || \
+ instr == Mips::PseudoMULT || instr == Mips::DMULT || \
+ instr == Mips::PseudoDMULT)
+
namespace llvm {
class FunctionPass;
class InstructionSelector;
diff --git a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
index 721e525331c6c..a42fe39e40201 100644
--- a/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
+++ b/llvm/lib/Target/Mips/MipsBranchExpansion.cpp
@@ -167,6 +167,7 @@ class MipsBranchExpansion : public MachineFunctionPass {
bool handleFPUDelaySlot();
bool handleLoadDelaySlot();
bool handlePossibleLongBranch();
+ bool handleMFLO();
const MipsSubtarget *STI;
const MipsInstrInfo *TII;
@@ -744,13 +745,15 @@ static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
template <typename Pred, typename Safe>
bool MipsBranchExpansion::handleSlot(Pred Predicate, Safe SafeInSlot) {
bool Changed = false;
+ bool hasPendingMFLO = false;
for (MachineFunction::iterator FI = MFp->begin(); FI != MFp->end(); ++FI) {
for (Iter I = FI->begin(); I != FI->end(); ++I) {
// Delay slot hazard handling. Use lookahead over state.
- if (!Predicate(*I))
+ if (!Predicate(*I) && !hasPendingMFLO) {
continue;
+ }
Iter IInSlot;
bool LastInstInFunction =
@@ -766,10 +769,21 @@ bool MipsBranchExpansion::handleSlot(Pred Predicate, Safe SafeInSlot) {
if (std::next(Iit) == FI->end() ||
std::next(Iit)->getOpcode() != Mips::NOP) {
Changed = true;
- TII->insertNop(*(I->getParent()), std::next(I), I->getDebugLoc())
- ->bundleWithPred();
- NumInsertedNops++;
+ if (Predicate(*I) ||
+ (!LastInstInFunction && !SafeInSlot(*IInSlot, *I))) {
+ TII->insertNop(*(I->getParent()), std::next(I), I->getDebugLoc())
+ ->bundleWithPred();
+ NumInsertedNops++;
+ }
+ if (IsMFLOMFHI(I->getOpcode()) && !LastInstInFunction &&
+ IsDIVMULT(IInSlot->getOpcode())) {
+ TII->insertNop(*(I->getParent()), std::next(I), I->getDebugLoc())
+ ->bundleWithPred();
+ NumInsertedNops++;
+ }
}
+ } else if (IsMFLOMFHI(I->getOpcode())) {
+ hasPendingMFLO = true;
}
}
}
@@ -777,6 +791,18 @@ bool MipsBranchExpansion::handleSlot(Pred Predicate, Safe SafeInSlot) {
return Changed;
}
+bool MipsBranchExpansion::handleMFLO() {
+ // mips1-4 require a minimum of 2 instructions between a mflo/mfhi
+ // and the next mul/div instruction.
+ if (STI->hasMips32() || STI->hasMips5())
+ return false;
+
+ return handleSlot([this](auto &I) -> bool { return TII->IsMfloOrMfhi(I); },
+ [this](auto &IInSlot, auto &I) -> bool {
+ return TII->SafeAfterMflo(IInSlot);
+ });
+}
+
bool MipsBranchExpansion::handleForbiddenSlot() {
// Forbidden slot hazards are only defined for MIPSR6 but not microMIPSR6.
if (!STI->hasMips32r6() || STI->inMicroMipsMode())
@@ -893,16 +919,19 @@ bool MipsBranchExpansion::runOnMachineFunction(MachineFunction &MF) {
bool forbiddenSlotChanged = handleForbiddenSlot();
bool fpuDelaySlotChanged = handleFPUDelaySlot();
bool loadDelaySlotChanged = handleLoadDelaySlot();
+ bool MfloChanged = handleMFLO();
bool Changed = longBranchChanged || forbiddenSlotChanged ||
- fpuDelaySlotChanged || loadDelaySlotChanged;
+ fpuDelaySlotChanged || loadDelaySlotChanged || MfloChanged;
// Then run them alternatively while there are changes.
while (forbiddenSlotChanged) {
longBranchChanged = handlePossibleLongBranch();
fpuDelaySlotChanged = handleFPUDelaySlot();
loadDelaySlotChanged = handleLoadDelaySlot();
- if (!longBranchChanged && !fpuDelaySlotChanged && !loadDelaySlotChanged)
+ MfloChanged = handleMFLO();
+ if (!longBranchChanged && !fpuDelaySlotChanged && !loadDelaySlotChanged &&
+ !MfloChanged)
break;
forbiddenSlotChanged = handleForbiddenSlot();
}
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index cb98c04ff4e50..e0401ba8d720e 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -743,6 +743,12 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
bool InMicroMipsMode = STI.inMicroMipsMode();
const MipsInstrInfo *TII = STI.getInstrInfo();
unsigned Opcode = (*Slot).getOpcode();
+
+ if ((CurrI->getOpcode() == Mips::MFLO ||
+ CurrI->getOpcode() == Mips::MFLO64) &&
+ (!STI.hasMips32() && !STI.hasMips5()))
+ continue;
+
// This is complicated by the tail call optimization. For non-PIC code
// there is only a 32bit sized unconditional branch which can be assumed
// to be able to reach the target. b16 only has a range of +/- 1 KB.
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 392cc15d7943a..663be430a2c71 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -13,6 +13,7 @@
#include "MipsInstrInfo.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "Mips.h"
#include "MipsSubtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -571,6 +572,13 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
return 0;
}
+bool MipsInstrInfo::SafeAfterMflo(const MachineInstr &MI) const {
+ if (IsDIVMULT(MI.getOpcode()))
+ return false;
+
+ return true;
+}
+
/// Predicate for distingushing between control transfer instructions and all
/// other instructions for handling forbidden slots. Consider inline assembly
/// as unsafe as well.
@@ -623,6 +631,13 @@ bool MipsInstrInfo::SafeInLoadDelaySlot(const MachineInstr &MIInSlot,
});
}
+bool MipsInstrInfo::IsMfloOrMfhi(const MachineInstr &MI) const {
+ if (IsMFLOMFHI(MI.getOpcode()))
+ return true;
+
+ return false;
+}
+
/// Predicate for distingushing instructions that have forbidden slots.
bool MipsInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const {
return (MI.getDesc().TSFlags & MipsII::HasForbiddenSlot) != 0;
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.h b/llvm/lib/Target/Mips/MipsInstrInfo.h
index dc4b9d99b39d2..bd6aed6f22665 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -89,6 +89,8 @@ class MipsInstrInfo : public MipsGenInstrInfo {
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;
+ bool SafeAfterMflo(const MachineInstr &MI) const;
+
/// Predicate to determine if an instruction can go in a forbidden slot.
bool SafeInForbiddenSlot(const MachineInstr &MI) const;
@@ -100,6 +102,8 @@ class MipsInstrInfo : public MipsGenInstrInfo {
bool SafeInLoadDelaySlot(const MachineInstr &MIInSlot,
const MachineInstr &LoadMI) const;
+ bool IsMfloOrMfhi(const MachineInstr &MI) const;
+
/// Predicate to determine if an instruction has a forbidden slot.
bool HasForbiddenSlot(const MachineInstr &MI) const;
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll b/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll
index af3d4f50f3fe4..8d548861f4393 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/sdiv.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS2
; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
@@ -13,9 +13,9 @@
; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS3
; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS3
; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
@@ -35,6 +35,11 @@
; RUN: FileCheck %s -check-prefix=MMR6
define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) {
+; MIPS2-LABEL: sdiv_i1:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: move $2, $4
+;
; GP32-LABEL: sdiv_i1:
; GP32: # %bb.0: # %entry
; GP32-NEXT: jr $ra
@@ -45,6 +50,11 @@ define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) {
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: move $2, $4
;
+; MIPS3-LABEL: sdiv_i1:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: move $2, $4
+;
; GP64-LABEL: sdiv_i1:
; GP64: # %bb.0: # %entry
; GP64-NEXT: jr $ra
@@ -70,6 +80,15 @@ entry:
}
define signext i8 @sdiv_i8(i8 signext %a, i8 signext %b) {
+; MIPS2-LABEL: sdiv_i8:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: div $zero, $4, $5
+; MIPS2-NEXT: teq $5, $zero, 7
+; MIPS2-NEXT: mflo $1
+; MIPS2-NEXT: sll $1, $1, 24
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: sra $2, $1, 24
+;
; GP32R0R2-LABEL: sdiv_i8:
; GP32R0R2: # %bb.0: # %entry
; GP32R0R2-NEXT: div $zero, $4, $5
@@ -94,6 +113,15 @@ define signext i8 @sdiv_i8(i8 signext %a, i8 signext %b) {
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: seb $2, $1
;
+; MIPS3-LABEL: sdiv_i8:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: div $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $1
+; MIPS3-NEXT: sll $1, $1, 24
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: sra $2, $1, 24
+;
; GP64R0R1-LABEL: sdiv_i8:
; GP64R0R1: # %bb.0: # %entry
; GP64R0R1-NEXT: div $zero, $4, $5
@@ -138,6 +166,15 @@ entry:
}
define signext i16 @sdiv_i16(i16 signext %a, i16 signext %b) {
+; MIPS2-LABEL: sdiv_i16:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: div $zero, $4, $5
+; MIPS2-NEXT: teq $5, $zero, 7
+; MIPS2-NEXT: mflo $1
+; MIPS2-NEXT: sll $1, $1, 16
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: sra $2, $1, 16
+;
; GP32R0R2-LABEL: sdiv_i16:
; GP32R0R2: # %bb.0: # %entry
; GP32R0R2-NEXT: div $zero, $4, $5
@@ -162,6 +199,15 @@ define signext i16 @sdiv_i16(i16 signext %a, i16 signext %b) {
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: seh $2, $1
;
+; MIPS3-LABEL: sdiv_i16:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: div $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $1
+; MIPS3-NEXT: sll $1, $1, 16
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: sra $2, $1, 16
+;
; GP64R0R1-LABEL: sdiv_i16:
; GP64R0R1: # %bb.0: # %entry
; GP64R0R1-NEXT: div $zero, $4, $5
@@ -206,6 +252,14 @@ entry:
}
define signext i32 @sdiv_i32(i32 signext %a, i32 signext %b) {
+; MIPS2-LABEL: sdiv_i32:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: div $zero, $4, $5
+; MIPS2-NEXT: teq $5, $zero, 7
+; MIPS2-NEXT: mflo $2
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: nop
+;
; GP32-LABEL: sdiv_i32:
; GP32: # %bb.0: # %entry
; GP32-NEXT: div $zero, $4, $5
@@ -219,6 +273,14 @@ define signext i32 @sdiv_i32(i32 signext %a, i32 signext %b) {
; GP32R6-NEXT: teq $5, $zero, 7
; GP32R6-NEXT: jrc $ra
;
+; MIPS3-LABEL: sdiv_i32:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: div $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $2
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: nop
+;
; GP64-LABEL: sdiv_i32:
; GP64: # %bb.0: # %entry
; GP64-NEXT: div $zero, $4, $5
@@ -250,6 +312,22 @@ entry:
}
define signext i64 @sdiv_i64(i64 signext %a, i64 signext %b) {
+; MIPS2-LABEL: sdiv_i64:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: lui $2, %hi(_gp_disp)
+; MIPS2-NEXT: addiu $2, $2, %lo(_gp_disp)
+; MIPS2-NEXT: addiu $sp, $sp, -24
+; MIPS2-NEXT: .cfi_def_cfa_offset 24
+; MIPS2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS2-NEXT: .cfi_offset 31, -4
+; MIPS2-NEXT: addu $gp, $2, $25
+; MIPS2-NEXT: lw $25, %call16(__divdi3)($gp)
+; MIPS2-NEXT: jalr $25
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: addiu $sp, $sp, 24
+;
; GP32-LABEL: sdiv_i64:
; GP32: # %bb.0: # %entry
; GP32-NEXT: lui $2, %hi(_gp_disp)
@@ -281,6 +359,14 @@ define signext i64 @sdiv_i64(i64 signext %a, i64 signext %b) {
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: addiu $sp, $sp, 24
;
+; MIPS3-LABEL: sdiv_i64:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: ddiv $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $2
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: nop
+;
; GP64-LABEL: sdiv_i64:
; GP64: # %bb.0: # %entry
; GP64-NEXT: ddiv $zero, $4, $5
@@ -332,6 +418,30 @@ entry:
}
define signext i128 @sdiv_i128(i128 signext %a, i128 signext %b) {
+; MIPS2-LABEL: sdiv_i128:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: lui $2, %hi(_gp_disp)
+; MIPS2-NEXT: addiu $2, $2, %lo(_gp_disp)
+; MIPS2-NEXT: addiu $sp, $sp, -40
+; MIPS2-NEXT: .cfi_def_cfa_offset 40
+; MIPS2-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS2-NEXT: .cfi_offset 31, -4
+; MIPS2-NEXT: addu $gp, $2, $25
+; MIPS2-NEXT: lw $1, 60($sp)
+; MIPS2-NEXT: lw $2, 64($sp)
+; MIPS2-NEXT: lw $3, 68($sp)
+; MIPS2-NEXT: sw $3, 28($sp)
+; MIPS2-NEXT: sw $2, 24($sp)
+; MIPS2-NEXT: sw $1, 20($sp)
+; MIPS2-NEXT: lw $1, 56($sp)
+; MIPS2-NEXT: sw $1, 16($sp)
+; MIPS2-NEXT: lw $25, %call16(__divti3)($gp)
+; MIPS2-NEXT: jalr $25
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: addiu $sp, $sp, 40
+;
; GP32-LABEL: sdiv_i128:
; GP32: # %bb.0: # %entry
; GP32-NEXT: lui $2, %hi(_gp_disp)
@@ -379,6 +489,25 @@ define signext i128 @sdiv_i128(i128 signext %a, i128 signext %b) {
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: addiu $sp, $sp, 40
;
+; MIPS3-LABEL: sdiv_i128:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: daddiu $sp, $sp, -16
+; MIPS3-NEXT: .cfi_def_cfa_offset 16
+; MIPS3-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS3-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS3-NEXT: .cfi_offset 31, -8
+; MIPS3-NEXT: .cfi_offset 28, -16
+; MIPS3-NEXT: lui $1, %hi(%neg(%gp_rel(sdiv_i128)))
+; MIPS3-NEXT: daddu $1, $1, $25
+; MIPS3-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(sdiv_i128)))
+; MIPS3-NEXT: ld $25, %call16(__divti3)($gp)
+; MIPS3-NEXT: jalr $25
+; MIPS3-NEXT: nop
+; MIPS3-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS3-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: daddiu $sp, $sp, 16
+;
; GP64-LABEL: sdiv_i128:
; GP64: # %bb.0: # %entry
; GP64-NEXT: daddiu $sp, $sp, -16
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-div.ll b/llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-div.ll
new file mode 100644
index 0000000000000..779b12b34ee84
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-div.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS2
+; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS32
+
+; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS3
+; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS64
+
+define signext i32 @sdiv_i32(i32 signext %a, i32 signext %b, i32 signext %c) {
+; MIPS2-LABEL: sdiv_i32:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: div $zero, $4, $5
+; MIPS2-NEXT: teq $5, $zero, 7
+; MIPS2-NEXT: mflo $1
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: div $zero, $1, $6
+; MIPS2-NEXT: teq $6, $zero, 7
+; MIPS2-NEXT: mflo $2
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: nop
+;
+; MIPS32-LABEL: sdiv_i32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: div $zero, $4, $5
+; MIPS32-NEXT: teq $5, $zero, 7
+; MIPS32-NEXT: mflo $1
+; MIPS32-NEXT: div $zero, $1, $6
+; MIPS32-NEXT: teq $6, $zero, 7
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: mflo $2
+;
+entry:
+ %div = sdiv i32 %a, %b
+ %div1 = sdiv i32 %div, %c
+ ret i32 %div1
+}
+
+define signext i64 @sdiv_i64(i64 signext %a, i64 signext %b, i64 signext %c) {
+; MIPS3-LABEL: sdiv_i64:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: ddiv $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $1
+; MIPS3-NEXT: nop
+; MIPS3-NEXT: nop
+; MIPS3-NEXT: ddiv $zero, $1, $6
+; MIPS3-NEXT: teq $6, $zero, 7
+; MIPS3-NEXT: mflo $2
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: nop
+;
+; MIPS64-LABEL: sdiv_i64:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: ddiv $zero, $4, $5
+; MIPS64-NEXT: teq $5, $zero, 7
+; MIPS64-NEXT: mflo $1
+; MIPS64-NEXT: ddiv $zero, $1, $6
+; MIPS64-NEXT: teq $6, $zero, 7
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: mflo $2
+;
+entry:
+ %div = sdiv i64 %a, %b
+ %div1 = sdiv i64 %div, %c
+ ret i64 %div1
+}
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-mult.ll b/llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-mult.ll
new file mode 100644
index 0000000000000..db2c660e9bc79
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-mult.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=mips -mcpu=mips2 -O3 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS2
+; RUN: llc < %s -mtriple=mips -mcpu=mips32 -O3 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS32
+
+; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -O3 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS3
+; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -O3 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS64
+
+define signext i32 @mult_i32(i32 signext %a, i32 signext %b, i32 signext %c) {
+; MIPS2-LABEL: mult_i32:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: mult $4, $5
+; MIPS2-NEXT: mflo $1
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: mult $1, $6
+; MIPS2-NEXT: mflo $2
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: nop
+;
+; MIPS32-LABEL: mult_i32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: mul $1, $4, $5
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: mul $2, $1, $6
+;
+entry:
+ %mul = mul nsw i32 %a, %b
+ %mul1 = mul nsw i32 %mul, %c
+ ret i32 %mul1
+}
+
+define signext i64 @mul_i64(i64 signext %a, i64 signext %b, i64 signext %c) {
+; MIPS3-LABEL: mul_i64:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: dmult $4, $5
+; MIPS3-NEXT: mflo $1
+; MIPS3-NEXT: nop
+; MIPS3-NEXT: nop
+; MIPS3-NEXT: dmult $1, $6
+; MIPS3-NEXT: mflo $2
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: nop
+;
+; MIPS64-LABEL: mul_i64:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: dmult $4, $5
+; MIPS64-NEXT: mflo $1
+; MIPS64-NEXT: dmult $1, $6
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: mflo $2
+;
+entry:
+ %mul = mul i64 %a, %b
+ %mul1 = mul i64 %mul, %c
+ ret i64 %mul1
+}
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-udiv.ll b/llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-udiv.ll
new file mode 100644
index 0000000000000..3f68957972908
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/llvm-ir/two-consecutive-udiv.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS2
+; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS32
+
+; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS3
+; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS64
+
+define signext i32 @udiv_i32(i32 signext %a, i32 signext %b, i32 signext %c) {
+; MIPS2-LABEL: udiv_i32:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: divu $zero, $4, $5
+; MIPS2-NEXT: teq $5, $zero, 7
+; MIPS2-NEXT: mflo $1
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: divu $zero, $1, $6
+; MIPS2-NEXT: teq $6, $zero, 7
+; MIPS2-NEXT: mflo $2
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: nop
+;
+; MIPS32-LABEL: udiv_i32:
+; MIPS32: # %bb.0: # %entry
+; MIPS32-NEXT: divu $zero, $4, $5
+; MIPS32-NEXT: teq $5, $zero, 7
+; MIPS32-NEXT: mflo $1
+; MIPS32-NEXT: divu $zero, $1, $6
+; MIPS32-NEXT: teq $6, $zero, 7
+; MIPS32-NEXT: jr $ra
+; MIPS32-NEXT: mflo $2
+;
+entry:
+ %udiv = udiv i32 %a, %b
+ %udiv1 = udiv i32 %udiv, %c
+ ret i32 %udiv1
+}
+
+define signext i64 @udiv_i64(i64 signext %a, i64 signext %b, i64 signext %c) {
+; MIPS3-LABEL: udiv_i64:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: ddivu $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $1
+; MIPS3-NEXT: nop
+; MIPS3-NEXT: nop
+; MIPS3-NEXT: ddivu $zero, $1, $6
+; MIPS3-NEXT: teq $6, $zero, 7
+; MIPS3-NEXT: mflo $2
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: nop
+;
+; MIPS64-LABEL: udiv_i64:
+; MIPS64: # %bb.0: # %entry
+; MIPS64-NEXT: ddivu $zero, $4, $5
+; MIPS64-NEXT: teq $5, $zero, 7
+; MIPS64-NEXT: mflo $1
+; MIPS64-NEXT: ddivu $zero, $1, $6
+; MIPS64-NEXT: teq $6, $zero, 7
+; MIPS64-NEXT: jr $ra
+; MIPS64-NEXT: mflo $2
+;
+entry:
+ %udiv = udiv i64 %a, %b
+ %udiv1 = udiv i64 %udiv, %c
+ ret i64 %udiv1
+}
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll b/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll
index e3dd347e723bc..cc2c6614e69c8 100644
--- a/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/udiv.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS2
; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32
; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
@@ -13,9 +13,9 @@
; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS3
; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64
+; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=MIPS3
; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
; RUN: -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64
; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
@@ -35,6 +35,11 @@
; RUN: FileCheck %s -check-prefix=MMR6
define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) {
+; MIPS2-LABEL: udiv_i1:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: move $2, $4
+;
; GP32-LABEL: udiv_i1:
; GP32: # %bb.0: # %entry
; GP32-NEXT: jr $ra
@@ -45,6 +50,11 @@ define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) {
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: move $2, $4
;
+; MIPS3-LABEL: udiv_i1:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: move $2, $4
+;
; GP64-LABEL: udiv_i1:
; GP64: # %bb.0: # %entry
; GP64-NEXT: jr $ra
@@ -70,6 +80,14 @@ entry:
}
define zeroext i8 @udiv_i8(i8 zeroext %a, i8 zeroext %b) {
+; MIPS2-LABEL: udiv_i8:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: divu $zero, $4, $5
+; MIPS2-NEXT: teq $5, $zero, 7
+; MIPS2-NEXT: mflo $2
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: nop
+;
; GP32-LABEL: udiv_i8:
; GP32: # %bb.0: # %entry
; GP32-NEXT: divu $zero, $4, $5
@@ -83,6 +101,14 @@ define zeroext i8 @udiv_i8(i8 zeroext %a, i8 zeroext %b) {
; GP32R6-NEXT: teq $5, $zero, 7
; GP32R6-NEXT: jrc $ra
;
+; MIPS3-LABEL: udiv_i8:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: divu $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $2
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: nop
+;
; GP64-LABEL: udiv_i8:
; GP64: # %bb.0: # %entry
; GP64-NEXT: divu $zero, $4, $5
@@ -114,6 +140,14 @@ entry:
}
define zeroext i16 @udiv_i16(i16 zeroext %a, i16 zeroext %b) {
+; MIPS2-LABEL: udiv_i16:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: divu $zero, $4, $5
+; MIPS2-NEXT: teq $5, $zero, 7
+; MIPS2-NEXT: mflo $2
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: nop
+;
; GP32-LABEL: udiv_i16:
; GP32: # %bb.0: # %entry
; GP32-NEXT: divu $zero, $4, $5
@@ -127,6 +161,14 @@ define zeroext i16 @udiv_i16(i16 zeroext %a, i16 zeroext %b) {
; GP32R6-NEXT: teq $5, $zero, 7
; GP32R6-NEXT: jrc $ra
;
+; MIPS3-LABEL: udiv_i16:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: divu $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $2
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: nop
+;
; GP64-LABEL: udiv_i16:
; GP64: # %bb.0: # %entry
; GP64-NEXT: divu $zero, $4, $5
@@ -158,6 +200,14 @@ entry:
}
define signext i32 @udiv_i32(i32 signext %a, i32 signext %b) {
+; MIPS2-LABEL: udiv_i32:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: divu $zero, $4, $5
+; MIPS2-NEXT: teq $5, $zero, 7
+; MIPS2-NEXT: mflo $2
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: nop
+;
; GP32-LABEL: udiv_i32:
; GP32: # %bb.0: # %entry
; GP32-NEXT: divu $zero, $4, $5
@@ -171,6 +221,14 @@ define signext i32 @udiv_i32(i32 signext %a, i32 signext %b) {
; GP32R6-NEXT: teq $5, $zero, 7
; GP32R6-NEXT: jrc $ra
;
+; MIPS3-LABEL: udiv_i32:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: divu $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $2
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: nop
+;
; GP64-LABEL: udiv_i32:
; GP64: # %bb.0: # %entry
; GP64-NEXT: divu $zero, $4, $5
@@ -202,6 +260,22 @@ entry:
}
define signext i64 @udiv_i64(i64 signext %a, i64 signext %b) {
+; MIPS2-LABEL: udiv_i64:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: lui $2, %hi(_gp_disp)
+; MIPS2-NEXT: addiu $2, $2, %lo(_gp_disp)
+; MIPS2-NEXT: addiu $sp, $sp, -24
+; MIPS2-NEXT: .cfi_def_cfa_offset 24
+; MIPS2-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill
+; MIPS2-NEXT: .cfi_offset 31, -4
+; MIPS2-NEXT: addu $gp, $2, $25
+; MIPS2-NEXT: lw $25, %call16(__udivdi3)($gp)
+; MIPS2-NEXT: jalr $25
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: addiu $sp, $sp, 24
+;
; GP32-LABEL: udiv_i64:
; GP32: # %bb.0: # %entry
; GP32-NEXT: lui $2, %hi(_gp_disp)
@@ -233,6 +307,14 @@ define signext i64 @udiv_i64(i64 signext %a, i64 signext %b) {
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: addiu $sp, $sp, 24
;
+; MIPS3-LABEL: udiv_i64:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: ddivu $zero, $4, $5
+; MIPS3-NEXT: teq $5, $zero, 7
+; MIPS3-NEXT: mflo $2
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: nop
+;
; GP64-LABEL: udiv_i64:
; GP64: # %bb.0: # %entry
; GP64-NEXT: ddivu $zero, $4, $5
@@ -284,6 +366,30 @@ entry:
}
define signext i128 @udiv_i128(i128 signext %a, i128 signext %b) {
+; MIPS2-LABEL: udiv_i128:
+; MIPS2: # %bb.0: # %entry
+; MIPS2-NEXT: lui $2, %hi(_gp_disp)
+; MIPS2-NEXT: addiu $2, $2, %lo(_gp_disp)
+; MIPS2-NEXT: addiu $sp, $sp, -40
+; MIPS2-NEXT: .cfi_def_cfa_offset 40
+; MIPS2-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill
+; MIPS2-NEXT: .cfi_offset 31, -4
+; MIPS2-NEXT: addu $gp, $2, $25
+; MIPS2-NEXT: lw $1, 60($sp)
+; MIPS2-NEXT: lw $2, 64($sp)
+; MIPS2-NEXT: lw $3, 68($sp)
+; MIPS2-NEXT: sw $3, 28($sp)
+; MIPS2-NEXT: sw $2, 24($sp)
+; MIPS2-NEXT: sw $1, 20($sp)
+; MIPS2-NEXT: lw $1, 56($sp)
+; MIPS2-NEXT: sw $1, 16($sp)
+; MIPS2-NEXT: lw $25, %call16(__udivti3)($gp)
+; MIPS2-NEXT: jalr $25
+; MIPS2-NEXT: nop
+; MIPS2-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload
+; MIPS2-NEXT: jr $ra
+; MIPS2-NEXT: addiu $sp, $sp, 40
+;
; GP32-LABEL: udiv_i128:
; GP32: # %bb.0: # %entry
; GP32-NEXT: lui $2, %hi(_gp_disp)
@@ -331,6 +437,25 @@ define signext i128 @udiv_i128(i128 signext %a, i128 signext %b) {
; GP32R6-NEXT: jr $ra
; GP32R6-NEXT: addiu $sp, $sp, 40
;
+; MIPS3-LABEL: udiv_i128:
+; MIPS3: # %bb.0: # %entry
+; MIPS3-NEXT: daddiu $sp, $sp, -16
+; MIPS3-NEXT: .cfi_def_cfa_offset 16
+; MIPS3-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill
+; MIPS3-NEXT: sd $gp, 0($sp) # 8-byte Folded Spill
+; MIPS3-NEXT: .cfi_offset 31, -8
+; MIPS3-NEXT: .cfi_offset 28, -16
+; MIPS3-NEXT: lui $1, %hi(%neg(%gp_rel(udiv_i128)))
+; MIPS3-NEXT: daddu $1, $1, $25
+; MIPS3-NEXT: daddiu $gp, $1, %lo(%neg(%gp_rel(udiv_i128)))
+; MIPS3-NEXT: ld $25, %call16(__udivti3)($gp)
+; MIPS3-NEXT: jalr $25
+; MIPS3-NEXT: nop
+; MIPS3-NEXT: ld $gp, 0($sp) # 8-byte Folded Reload
+; MIPS3-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload
+; MIPS3-NEXT: jr $ra
+; MIPS3-NEXT: daddiu $sp, $sp, 16
+;
; GP64-LABEL: udiv_i128:
; GP64: # %bb.0: # %entry
; GP64-NEXT: daddiu $sp, $sp, -16
More information about the llvm-commits
mailing list