[llvm] 00255f4 - [AMDGPU] Fix access beyond the end of the basic block in execMayBeModifiedBeforeAnyUse.

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 23 09:19:39 PDT 2020


Author: vpykhtin
Date: 2020-10-23T19:17:48+03:00
New Revision: 00255f419298b93c812324a257ff057e57ab5c04

URL: https://github.com/llvm/llvm-project/commit/00255f419298b93c812324a257ff057e57ab5c04
DIFF: https://github.com/llvm/llvm-project/commit/00255f419298b93c812324a257ff057e57ab5c04.diff

LOG: [AMDGPU] Fix access beyond the end of the basic block in execMayBeModifiedBeforeAnyUse.

I was wrong in thinking that MRI.use_instructions return unique instructions and mislead Jay in his previous patch D64393.

First loop counted more instructions than it was in reality and the second loop went beyond the basic block with that counter.

I used Jay's previous code that relied on MRI.use_operands to constrain the number of instructions to check among.
modifiesRegister is inlined to reduce the number of passes over instruction operands and added assert on BB end boundary.

Differential Revision: https://reviews.llvm.org/D89386

Added: 
    llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/unittests/Target/AMDGPU/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e62a0d985e62..db6b8ec44019 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7152,36 +7152,51 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
   auto *TRI = MRI.getTargetRegisterInfo();
   auto *DefBB = DefMI.getParent();
 
-  const int MaxUseInstScan = 10;
-  int NumUseInst = 0;
+  const int MaxUseScan = 10;
+  int NumUse = 0;
 
-  for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) {
+  for (auto &Use : MRI.use_nodbg_operands(VReg)) {
+    auto &UseInst = *Use.getParent();
     // Don't bother searching between blocks, although it is possible this block
     // doesn't modify exec.
     if (UseInst.getParent() != DefBB)
       return true;
 
-    if (++NumUseInst > MaxUseInstScan)
+    if (++NumUse > MaxUseScan)
       return true;
   }
 
+  if (NumUse == 0)
+    return false;
+
   const int MaxInstScan = 20;
   int NumInst = 0;
 
   // Stop scan when we have seen all the uses.
   for (auto I = std::next(DefMI.getIterator()); ; ++I) {
+    assert(I != DefBB->end());
+
     if (I->isDebugInstr())
       continue;
 
     if (++NumInst > MaxInstScan)
       return true;
 
-    if (I->readsRegister(VReg))
-      if (--NumUseInst == 0)
-        return false;
+    for (const MachineOperand &Op : I->operands()) {
+      // We don't check reg masks here as they're used only on calls:
+      // 1. EXEC is only considered const within one BB
+      // 2. Call should be a terminator instruction if present in a BB
 
-    if (I->modifiesRegister(AMDGPU::EXEC, TRI))
-      return true;
+      if (!Op.isReg())
+        continue;
+
+      Register Reg = Op.getReg();
+      if (Op.isUse()) {
+        if (Reg == VReg && --NumUse == 0)
+          return false;
+      } else if (TRI->regsOverlap(Reg, AMDGPU::EXEC))
+        return true;
+    }
   }
 }
 

diff  --git a/llvm/unittests/Target/AMDGPU/CMakeLists.txt b/llvm/unittests/Target/AMDGPU/CMakeLists.txt
index 7b4548a0b678..1d547f5c2ce0 100644
--- a/llvm/unittests/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/unittests/Target/AMDGPU/CMakeLists.txt
@@ -13,4 +13,5 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_target_unittest(AMDGPUTests
   DwarfRegMappings.cpp
+  ExecMayBeModifiedBeforeAnyUse.cpp
   )

diff  --git a/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp b/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp
new file mode 100755
index 000000000000..d93fdba9450b
--- /dev/null
+++ b/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp
@@ -0,0 +1,72 @@
+//===- llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUSubtarget.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+#include <thread>
+
+using namespace llvm;
+
+// implementation is in the llvm/unittests/Target/AMDGPU/DwarfRegMappings.cpp
+std::unique_ptr<const GCNTargetMachine>
+createTargetMachine(std::string TStr, StringRef CPU, StringRef FS);
+
+TEST(AMDGPUExecMayBeModifiedBeforeAnyUse, TheTest) {
+  auto TM = createTargetMachine("amdgcn-amd-", "gfx906", "");
+  if (!TM)
+    return;
+
+  GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
+                  std::string(TM->getTargetFeatureString()), *TM);
+
+  LLVMContext Ctx;
+  Module Mod("Module", Ctx);
+  Mod.setDataLayout(TM->createDataLayout());
+
+  auto *Type = FunctionType::get(Type::getVoidTy(Ctx), false);
+  auto *F = Function::Create(Type, GlobalValue::ExternalLinkage, "Test", &Mod);
+
+  MachineModuleInfo MMI(TM.get());
+  auto MF = std::make_unique<MachineFunction>(*F, *TM, ST, 42, MMI);
+  auto *BB = MF->CreateMachineBasicBlock();
+  MF->push_back(BB);
+
+  auto E = BB->end();
+  DebugLoc DL;
+  const auto &TII = *ST.getInstrInfo();
+  auto &MRI = MF->getRegInfo();
+
+  // create machine IR
+  Register R = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+  MachineInstr *DefMI =
+      BuildMI(*BB, E, DL, TII.get(AMDGPU::S_MOV_B32), R).addImm(42).getInstr();
+
+  auto First =
+      BuildMI(*BB, E, DL, TII.get(AMDGPU::S_NOP)).addReg(R, RegState::Implicit);
+
+  BuildMI(*BB, E, DL, TII.get(AMDGPU::S_NOP)).addReg(R, RegState::Implicit);
+
+  // this violates the continuous sequence of R's uses for the first S_NOP
+  First.addReg(R, RegState::Implicit);
+
+#ifdef DEBUG_THIS_TEST
+  MF->dump();
+  MRI.dumpUses(R);
+#endif
+
+  // make sure execMayBeModifiedBeforeAnyUse doesn't crash
+  ASSERT_FALSE(execMayBeModifiedBeforeAnyUse(MRI, R, *DefMI));
+}


        


More information about the llvm-commits mailing list