[lld] [llvm] Reland "[sancov][LoongArch] Resolve pcaddu18i+jirl in evaluateBranch and teach sancov (#155371)" (PR #155964)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 28 20:04:14 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lld

Author: ZhaoQi (zhaoqi5)

<details>
<summary>Changes</summary>

Reland 9c994f5 after fixing ubsan bots failures.

This commit overrides `updateState` and `resetState` hooks in
`MCInstrAnalysis` in order to be able to analyze pcaddu18i+jirl pairs
inside `evaluateBranch`.

After this commit, `llvm-objdump` is able to correctly analyze and print
detailed information. `lld/test/ELF/loongarch-call36.s` shows the
changes.

Besides, this commit also teaches sancov to resolve such call sequences.
Without this commit, some tests in compiler-rt failed:

```
Failed Tests :
  SanitizerCommon-asan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard-dso.cpp
  SanitizerCommon-asan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard.cpp
  SanitizerCommon-lsan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard-dso.cpp
  SanitizerCommon-lsan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard.cpp
  SanitizerCommon-msan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard-dso.cpp
  SanitizerCommon-msan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard.cpp
```

The reason is that sancov could not resolve pcaddu18i+jirl call sequence
correctly and caused mismatches between coverage points in the binary
and the .sancov file:

```
ERROR: Coverage points in binary and .sancov file do not match.
```

NOTE: A similar issue might also occur on RISC-V when relaxation is
disabled (not verified). This commit can also fix for it.

---
Full diff: https://github.com/llvm/llvm-project/pull/155964.diff


3 Files Affected:

- (modified) lld/test/ELF/loongarch-call36.s (+3-3) 
- (modified) llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp (+80) 
- (modified) llvm/tools/sancov/sancov.cpp (+6-1) 


``````````diff
diff --git a/lld/test/ELF/loongarch-call36.s b/lld/test/ELF/loongarch-call36.s
index b593fdf1f6045..5cc0f2f3827c3 100644
--- a/lld/test/ELF/loongarch-call36.s
+++ b/lld/test/ELF/loongarch-call36.s
@@ -8,14 +8,14 @@
 ## hi20 = target - pc + (1 << 17) >> 18 = 0x60020 - 0x20010 + 0x20000 >> 18 = 1
 ## lo18 = target - pc & (1 << 18) - 1 = 0x60020 - 0x20010 & 0x3ffff = 16
 # EXE1:      20010: pcaddu18i $t0, 1
-# EXE1-NEXT: 20014: jirl $zero, $t0, 16
+# EXE1-NEXT: 20014: jirl $zero, $t0, 16 <foo>
 
 # RUN: ld.lld %t/a.o --section-start=.text=0x20010 --section-start=.sec.foo=0x40020 -o %t/exe2
 # RUN: llvm-objdump --no-show-raw-insn -d %t/exe2 | FileCheck --match-full-lines %s --check-prefix=EXE2
 ## hi20 = target - pc + (1 << 17) >> 18 = 0x40020 - 0x20010 + 0x20000 >> 18 = 1
 ## lo18 = target - pc & (1 << 18) - 1 = 0x40020 - 0x20010 & 0x3ffff = -131056
 # EXE2:      20010: pcaddu18i $t0, 1
-# EXE2-NEXT: 20014: jirl $zero, $t0, -131056
+# EXE2-NEXT: 20014: jirl $zero, $t0, -131056 <foo>
 
 # RUN: ld.lld %t/a.o -shared -T %t/a.t -o %t/a.so
 # RUN: llvm-readelf -x .got.plt %t/a.so | FileCheck --check-prefix=GOTPLT %s
@@ -34,7 +34,7 @@
 ## hi20 = foo at plt - pc + (1 << 17) >> 18 = 0x1234520 - 0x1274670 + 0x20000 >> 18 = -1
 ## lo18 = foo at plt - pc & (1 << 18) - 1 = 0x1234520 - 0x1274670 & 0x3ffff = -336
 # SO-NEXT: pcaddu18i $t0, -1{{$}}
-# SO-NEXT: jirl $zero, $t0, -336{{$}}
+# SO-NEXT: jirl $zero, $t0, -336 <.plt+0x20>{{$}}
 
 # GOTPLT:      section '.got.plt':
 # GOTPLT-NEXT: 0x01274730 00000000 00000000 00000000 00000000
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
index 35277ce094a7d..e5bd1c91edec9 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -26,6 +26,7 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/Compiler.h"
+#include <bitset>
 
 #define GET_INSTRINFO_MC_DESC
 #define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -95,10 +96,81 @@ createLoongArchAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS,
 namespace {
 
 class LoongArchMCInstrAnalysis : public MCInstrAnalysis {
+  int64_t GPRState[31] = {};
+  std::bitset<31> GPRValidMask;
+
+  static bool isGPR(MCRegister Reg) {
+    return Reg >= LoongArch::R0 && Reg <= LoongArch::R31;
+  }
+
+  static unsigned getRegIndex(MCRegister Reg) {
+    assert(isGPR(Reg) && Reg != LoongArch::R0 && "Invalid GPR reg");
+    return Reg - LoongArch::R1;
+  }
+
+  void setGPRState(MCRegister Reg, std::optional<int64_t> Value) {
+    if (Reg == LoongArch::R0)
+      return;
+
+    auto Index = getRegIndex(Reg);
+
+    if (Value) {
+      GPRState[Index] = *Value;
+      GPRValidMask.set(Index);
+    } else {
+      GPRValidMask.reset(Index);
+    }
+  }
+
+  std::optional<int64_t> getGPRState(MCRegister Reg) const {
+    if (Reg == LoongArch::R0)
+      return 0;
+
+    auto Index = getRegIndex(Reg);
+
+    if (GPRValidMask.test(Index))
+      return GPRState[Index];
+    return std::nullopt;
+  }
+
 public:
   explicit LoongArchMCInstrAnalysis(const MCInstrInfo *Info)
       : MCInstrAnalysis(Info) {}
 
+  void resetState() override { GPRValidMask.reset(); }
+
+  void updateState(const MCInst &Inst, uint64_t Addr) override {
+    // Terminators mark the end of a basic block which means the sequentially
+    // next instruction will be the first of another basic block and the current
+    // state will typically not be valid anymore. For calls, we assume all
+    // registers may be clobbered by the callee (TODO: should we take the
+    // calling convention into account?).
+    if (isTerminator(Inst) || isCall(Inst)) {
+      resetState();
+      return;
+    }
+
+    switch (Inst.getOpcode()) {
+    default: {
+      // Clear the state of all defined registers for instructions that we don't
+      // explicitly support.
+      auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
+      for (unsigned I = 0; I < NumDefs; ++I) {
+        auto DefReg = Inst.getOperand(I).getReg();
+        if (isGPR(DefReg))
+          setGPRState(DefReg, std::nullopt);
+      }
+      break;
+    }
+    case LoongArch::PCADDU18I:
+      setGPRState(
+          Inst.getOperand(0).getReg(),
+          Addr + SignExtend64<38>(
+                     static_cast<uint64_t>(Inst.getOperand(1).getImm()) << 18));
+      break;
+    }
+  }
+
   bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
                       uint64_t &Target) const override {
     unsigned NumOps = Inst.getNumOperands();
@@ -108,6 +180,14 @@ class LoongArchMCInstrAnalysis : public MCInstrAnalysis {
       return true;
     }
 
+    if (Inst.getOpcode() == LoongArch::JIRL) {
+      if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+        Target = *TargetRegState + Inst.getOperand(2).getImm();
+        return true;
+      }
+      return false;
+    }
+
     return false;
   }
 
diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp
index aebb5effd0be7..38893cf974a10 100644
--- a/llvm/tools/sancov/sancov.cpp
+++ b/llvm/tools/sancov/sancov.cpp
@@ -730,7 +730,7 @@ static void getObjectCoveragePoints(const object::ObjectFile &O,
   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
   failIfEmpty(MII, "no instruction info for target " + TripleName);
 
-  std::unique_ptr<const MCInstrAnalysis> MIA(
+  std::unique_ptr<MCInstrAnalysis> MIA(
       TheTarget->createMCInstrAnalysis(MII.get()));
   failIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
 
@@ -750,6 +750,9 @@ static void getObjectCoveragePoints(const object::ObjectFile &O,
     failIfError(BytesStr);
     ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*BytesStr);
 
+    if (MIA)
+      MIA->resetState();
+
     for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
          Index += Size) {
       MCInst Inst;
@@ -760,6 +763,7 @@ static void getObjectCoveragePoints(const object::ObjectFile &O,
           Size = std::min<uint64_t>(
               ThisBytes.size(),
               DisAsm->suggestBytesToSkip(ThisBytes, ThisAddr));
+        MIA->resetState();
         continue;
       }
       uint64_t Addr = Index + SectionAddr;
@@ -770,6 +774,7 @@ static void getObjectCoveragePoints(const object::ObjectFile &O,
           MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) &&
           SanCovAddrs.find(Target) != SanCovAddrs.end())
         Addrs->insert(CovPoint);
+      MIA->updateState(Inst, Addr);
     }
   }
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/155964


More information about the llvm-commits mailing list