[clang] 2edcde0 - [MIPS] Add -mfix4300 flag to enable vr4300 mulmul bugfix pass

Simon Atanasyan via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 31 05:01:13 PST 2021


Author: Random
Date: 2021-12-31T15:59:44+03:00
New Revision: 2edcde00cb396cc17d8d8b171a6ebaa97fd30e59

URL: https://github.com/llvm/llvm-project/commit/2edcde00cb396cc17d8d8b171a6ebaa97fd30e59
DIFF: https://github.com/llvm/llvm-project/commit/2edcde00cb396cc17d8d8b171a6ebaa97fd30e59.diff

LOG: [MIPS] Add -mfix4300 flag to enable vr4300 mulmul bugfix pass

Early revisions of the VR4300 have a hardware bug where two consecutive
multiplications can produce an incorrect result in the second multiply.
This revision adds the `-mfix4300` flag to llvm (and clang) which, when
passed, provides a software fix for this issue.

More precise description of the "mulmul" bug:
```
mul.[s,d] fd,fs,ft
mul.[s,d] fd,fs,ft  or  [D]MULT[U] rs,rt
```

When the above sequence is executed by the CPU, if at least one of the
source operands of the first mul instruction happens to be `sNaN`, `0`
or `Infinity`, then the second mul instruction may produce an incorrect
result. This can happen both if the two mul instructions are next to each
other and if the first one is in a delay slot and the second is the first
instruction of the branch target.

Description of the fix:
This fix adds a backend pass to llvm which scans for mul instructions in
each basic block and inserts a nop whenever the following conditions are
met:

 - The current instruction is a single or double-precision floating-point
   mul instruction.
 - The next instruction is either a mul instruction (any kind) or a branch
   instruction.

Differential Revision: https://reviews.llvm.org/D116238

Added: 
    llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
    llvm/test/CodeGen/Mips/vr4300-mulbranch.ll
    llvm/test/CodeGen/Mips/vr4300-mulmul.ll

Modified: 
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/ToolChains/Clang.cpp
    llvm/lib/Target/Mips/CMakeLists.txt
    llvm/lib/Target/Mips/Mips.h
    llvm/lib/Target/Mips/MipsTargetMachine.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index dc8bd831f2a26..6c56d9739de2a 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3617,6 +3617,7 @@ def mcheck_zero_division : Flag<["-"], "mcheck-zero-division">,
                            Group<m_mips_Features_Group>;
 def mno_check_zero_division : Flag<["-"], "mno-check-zero-division">,
                               Group<m_mips_Features_Group>;
+def mfix4300 : Flag<["-"], "mfix4300">, Group<m_mips_Features_Group>;
 def mcompact_branches_EQ : Joined<["-"], "mcompact-branches=">,
                            Group<m_mips_Features_Group>;
 def mbranch_likely : Flag<["-"], "mbranch-likely">, Group<m_Group>,

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 2c34392150938..3a4e9153689e2 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1929,6 +1929,11 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args,
     }
   }
 
+  if (Arg *A = Args.getLastArg(options::OPT_mfix4300)) {
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("-mfix4300");
+  }
+
   if (Arg *A = Args.getLastArg(options::OPT_G)) {
     StringRef v = A->getValue();
     CmdArgs.push_back("-mllvm");

diff  --git a/llvm/lib/Target/Mips/CMakeLists.txt b/llvm/lib/Target/Mips/CMakeLists.txt
index cbfd187fdfa21..5759fd9736e78 100644
--- a/llvm/lib/Target/Mips/CMakeLists.txt
+++ b/llvm/lib/Target/Mips/CMakeLists.txt
@@ -59,6 +59,7 @@ add_llvm_target(MipsCodeGen
   MipsTargetMachine.cpp
   MipsTargetObjectFile.cpp
   MicroMipsSizeReduction.cpp
+  MipsMulMulBugPass.cpp
 
   LINK_COMPONENTS
   Analysis

diff  --git a/llvm/lib/Target/Mips/Mips.h b/llvm/lib/Target/Mips/Mips.h
index b3faaab436f01..faf58545db626 100644
--- a/llvm/lib/Target/Mips/Mips.h
+++ b/llvm/lib/Target/Mips/Mips.h
@@ -38,6 +38,7 @@ namespace llvm {
   FunctionPass *createMicroMipsSizeReducePass();
   FunctionPass *createMipsExpandPseudoPass();
   FunctionPass *createMipsPreLegalizeCombiner();
+  FunctionPass *createMipsMulMulBugPass();
 
   InstructionSelector *createMipsInstructionSelector(const MipsTargetMachine &,
                                                      MipsSubtarget &,
@@ -47,6 +48,7 @@ namespace llvm {
   void initializeMipsBranchExpansionPass(PassRegistry &);
   void initializeMicroMipsSizeReducePass(PassRegistry &);
   void initializeMipsPreLegalizerCombinerPass(PassRegistry&);
+  void initializeMipsMulMulBugFixPass(PassRegistry&);
 } // end namespace llvm;
 
 #endif

diff  --git a/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
new file mode 100644
index 0000000000000..cb112ca1dfffe
--- /dev/null
+++ b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
@@ -0,0 +1,134 @@
+//===- MipsMulMulBugPass.cpp - Mips VR4300 mulmul bugfix pass -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Early revisions of the VR4300 have a hardware bug where two consecutive
+// multiplications can produce an incorrect result in the second multiply.
+//
+// This pass scans for mul instructions in each basic block and inserts
+// a nop whenever the following conditions are met:
+//
+// - The current instruction is a single or double-precision floating-point
+//   mul instruction.
+// - The next instruction is either a mul instruction (any kind)
+//   or a branch instruction.
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "mips-vr4300-mulmul-fix"
+
+using namespace llvm;
+
+namespace {
+
+class MipsMulMulBugFix : public MachineFunctionPass {
+public:
+  MipsMulMulBugFix() : MachineFunctionPass(ID) {
+    initializeMipsMulMulBugFixPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "Mips VR4300 mulmul bugfix"; }
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  static char ID;
+
+private:
+  bool fixMulMulBB(MachineBasicBlock &MBB, const MipsInstrInfo &MipsII);
+};
+
+} // namespace
+
+INITIALIZE_PASS(MipsMulMulBugFix, "mips-vr4300-mulmul-fix",
+                "Mips VR4300 mulmul bugfix", false, false)
+
+char MipsMulMulBugFix::ID = 0;
+
+bool MipsMulMulBugFix::runOnMachineFunction(MachineFunction &MF) {
+  const MipsInstrInfo &MipsII =
+      *static_cast<const MipsInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+  bool Modified = false;
+
+  for (auto &MBB : MF)
+    Modified |= fixMulMulBB(MBB, MipsII);
+
+  return Modified;
+}
+
+static bool isFirstMul(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case Mips::FMUL_S:
+  case Mips::FMUL_D:
+  case Mips::FMUL_D32:
+  case Mips::FMUL_D64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static bool isSecondMulOrBranch(const MachineInstr &MI) {
+  if (MI.isBranch() || MI.isIndirectBranch() || MI.isCall())
+    return true;
+
+  switch (MI.getOpcode()) {
+  case Mips::MUL:
+  case Mips::FMUL_S:
+  case Mips::FMUL_D:
+  case Mips::FMUL_D32:
+  case Mips::FMUL_D64:
+  case Mips::MULT:
+  case Mips::MULTu:
+  case Mips::DMULT:
+  case Mips::DMULTu:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool MipsMulMulBugFix::fixMulMulBB(MachineBasicBlock &MBB,
+                                   const MipsInstrInfo &MipsII) {
+  bool Modified = false;
+
+  // Iterate through the instructions in the basic block
+  for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
+                                         E = MBB.instr_end();
+       MII != E; ++MII) {
+
+    MachineBasicBlock::instr_iterator NextMII = std::next(MII);
+
+    // Trigger when the current instruction is a mul and the next instruction
+    // is either a mul or a branch in case the branch target start with a mul
+    if (NextMII != E && isFirstMul(*MII) && isSecondMulOrBranch(*NextMII)) {
+      LLVM_DEBUG(dbgs() << "Found mulmul!");
+
+      const MCInstrDesc &NewMCID = MipsII.get(Mips::NOP);
+      BuildMI(MBB, NextMII, DebugLoc(), NewMCID);
+      Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+FunctionPass *llvm::createMipsMulMulBugPass() { return new MipsMulMulBugFix(); }

diff  --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 8de3c9fd25bdc..f9f662a00117e 100644
--- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -45,6 +45,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mips"
 
+static cl::opt<bool>
+    EnableMulMulFix("mfix4300", cl::init(false),
+                    cl::desc("Enable the VR4300 mulmul bug fix."), cl::Hidden);
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() {
   // Register the target.
   RegisterTargetMachine<MipsebTargetMachine> X(getTheMipsTarget());
@@ -58,6 +62,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMipsTarget() {
   initializeMipsBranchExpansionPass(*PR);
   initializeMicroMipsSizeReducePass(*PR);
   initializeMipsPreLegalizerCombinerPass(*PR);
+  initializeMipsMulMulBugFixPass(*PR);
 }
 
 static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -292,6 +297,11 @@ void MipsPassConfig::addPreEmitPass() {
   // instructions which can be remapped to a 16 bit instruction.
   addPass(createMicroMipsSizeReducePass());
 
+  // This pass inserts a nop instruction between two back-to-back multiplication
+  // instructions when the "mfix4300" flag is passed.
+  if (EnableMulMulFix)
+    addPass(createMipsMulMulBugPass());
+
   // The delay slot filler pass can potientially create forbidden slot hazards
   // for MIPSR6 and therefore it should go before MipsBranchExpansion pass.
   addPass(createMipsDelaySlotFillerPass());

diff  --git a/llvm/test/CodeGen/Mips/vr4300-mulbranch.ll b/llvm/test/CodeGen/Mips/vr4300-mulbranch.ll
new file mode 100644
index 0000000000000..c3f15fb6afa66
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/vr4300-mulbranch.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define dso_local void @fun_s(float %a) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: fun_s
+; CHECK: mul.s
+; CHECK-NEXT: nop
+  %mul = fmul float %a, %a
+  tail call void @foo_s(float %mul) #2
+  ret void
+}
+
+declare dso_local void @foo_s(float) local_unnamed_addr #1
+
+; Function Attrs: nounwind
+define dso_local void @fun_d(double %a) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: fun_d
+; CHECK: mul.d
+; CHECK-NEXT: nop
+  %mul = fmul double %a, %a
+  tail call void @foo_d(double %mul) #2
+  ret void
+}
+
+declare dso_local void @foo_d(double) local_unnamed_addr #1

diff  --git a/llvm/test/CodeGen/Mips/vr4300-mulmul.ll b/llvm/test/CodeGen/Mips/vr4300-mulmul.ll
new file mode 100644
index 0000000000000..f20cc169825ee
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/vr4300-mulmul.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
+define dso_local float @fun_s(float %x) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: fun_s
+; CHECK: mul.s
+; CHECK-NEXT: nop
+; CHECK: mul.s
+  %mul = fmul float %x, %x
+  %mul1 = fmul float %mul, %x
+  ret float %mul1
+}
+
+define dso_local double @fun_d(double %x) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: fun_d
+; CHECK: mul.d
+; CHECK-NEXT: nop
+; CHECK: mul.d
+  %mul = fmul double %x, %x
+  %mul1 = fmul double %mul, %x
+  ret double %mul1
+}


        


More information about the cfe-commits mailing list