[lld] [llvm] Reland "[sancov][LoongArch] Resolve pcaddu18i+jirl in evaluateBranch and teach sancov (#155371)" (PR #155964)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 28 20:04:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lld
Author: ZhaoQi (zhaoqi5)
<details>
<summary>Changes</summary>
Reland 9c994f5 after fixing ubsan bots failures.
This commit overrides `updateState` and `resetState` hooks in
`MCInstrAnalysis` in order to be able to analyze pcaddu18i+jirl pairs
inside `evaluateBranch`.
After this commit, `llvm-objdump` is able to correctly analyze and print
detailed information. `lld/test/ELF/loongarch-call36.s` shows the
changes.
Besides, this commit also teaches sancov to resolve such call sequences.
Without this commit, some tests in compiler-rt failed:
```
Failed Tests :
SanitizerCommon-asan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard-dso.cpp
SanitizerCommon-asan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard.cpp
SanitizerCommon-lsan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard-dso.cpp
SanitizerCommon-lsan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard.cpp
SanitizerCommon-msan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard-dso.cpp
SanitizerCommon-msan-loongarch64-Linux :: sanitizer_coverage_trace_pc_guard.cpp
```
The reason is that sancov could not resolve pcaddu18i+jirl call sequence
correctly and caused mismatches between coverage points in the binary
and the .sancov file:
```
ERROR: Coverage points in binary and .sancov file do not match.
```
NOTE: A similar issue might also occur on RISC-V when relaxation is
disabled (not verified). This commit can also fix for it.
---
Full diff: https://github.com/llvm/llvm-project/pull/155964.diff
3 Files Affected:
- (modified) lld/test/ELF/loongarch-call36.s (+3-3)
- (modified) llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp (+80)
- (modified) llvm/tools/sancov/sancov.cpp (+6-1)
``````````diff
diff --git a/lld/test/ELF/loongarch-call36.s b/lld/test/ELF/loongarch-call36.s
index b593fdf1f6045..5cc0f2f3827c3 100644
--- a/lld/test/ELF/loongarch-call36.s
+++ b/lld/test/ELF/loongarch-call36.s
@@ -8,14 +8,14 @@
## hi20 = target - pc + (1 << 17) >> 18 = 0x60020 - 0x20010 + 0x20000 >> 18 = 1
## lo18 = target - pc & (1 << 18) - 1 = 0x60020 - 0x20010 & 0x3ffff = 16
# EXE1: 20010: pcaddu18i $t0, 1
-# EXE1-NEXT: 20014: jirl $zero, $t0, 16
+# EXE1-NEXT: 20014: jirl $zero, $t0, 16 <foo>
# RUN: ld.lld %t/a.o --section-start=.text=0x20010 --section-start=.sec.foo=0x40020 -o %t/exe2
# RUN: llvm-objdump --no-show-raw-insn -d %t/exe2 | FileCheck --match-full-lines %s --check-prefix=EXE2
## hi20 = target - pc + (1 << 17) >> 18 = 0x40020 - 0x20010 + 0x20000 >> 18 = 1
## lo18 = target - pc & (1 << 18) - 1 = 0x40020 - 0x20010 & 0x3ffff = -131056
# EXE2: 20010: pcaddu18i $t0, 1
-# EXE2-NEXT: 20014: jirl $zero, $t0, -131056
+# EXE2-NEXT: 20014: jirl $zero, $t0, -131056 <foo>
# RUN: ld.lld %t/a.o -shared -T %t/a.t -o %t/a.so
# RUN: llvm-readelf -x .got.plt %t/a.so | FileCheck --check-prefix=GOTPLT %s
@@ -34,7 +34,7 @@
## hi20 = foo at plt - pc + (1 << 17) >> 18 = 0x1234520 - 0x1274670 + 0x20000 >> 18 = -1
## lo18 = foo at plt - pc & (1 << 18) - 1 = 0x1234520 - 0x1274670 & 0x3ffff = -336
# SO-NEXT: pcaddu18i $t0, -1{{$}}
-# SO-NEXT: jirl $zero, $t0, -336{{$}}
+# SO-NEXT: jirl $zero, $t0, -336 <.plt+0x20>{{$}}
# GOTPLT: section '.got.plt':
# GOTPLT-NEXT: 0x01274730 00000000 00000000 00000000 00000000
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
index 35277ce094a7d..e5bd1c91edec9 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -26,6 +26,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
+#include <bitset>
#define GET_INSTRINFO_MC_DESC
#define ENABLE_INSTR_PREDICATE_VERIFIER
@@ -95,10 +96,81 @@ createLoongArchAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS,
namespace {
class LoongArchMCInstrAnalysis : public MCInstrAnalysis {
+ int64_t GPRState[31] = {};
+ std::bitset<31> GPRValidMask;
+
+ static bool isGPR(MCRegister Reg) {
+ return Reg >= LoongArch::R0 && Reg <= LoongArch::R31;
+ }
+
+ static unsigned getRegIndex(MCRegister Reg) {
+ assert(isGPR(Reg) && Reg != LoongArch::R0 && "Invalid GPR reg");
+ return Reg - LoongArch::R1;
+ }
+
+ void setGPRState(MCRegister Reg, std::optional<int64_t> Value) {
+ if (Reg == LoongArch::R0)
+ return;
+
+ auto Index = getRegIndex(Reg);
+
+ if (Value) {
+ GPRState[Index] = *Value;
+ GPRValidMask.set(Index);
+ } else {
+ GPRValidMask.reset(Index);
+ }
+ }
+
+ std::optional<int64_t> getGPRState(MCRegister Reg) const {
+ if (Reg == LoongArch::R0)
+ return 0;
+
+ auto Index = getRegIndex(Reg);
+
+ if (GPRValidMask.test(Index))
+ return GPRState[Index];
+ return std::nullopt;
+ }
+
public:
explicit LoongArchMCInstrAnalysis(const MCInstrInfo *Info)
: MCInstrAnalysis(Info) {}
+ void resetState() override { GPRValidMask.reset(); }
+
+ void updateState(const MCInst &Inst, uint64_t Addr) override {
+ // Terminators mark the end of a basic block which means the sequentially
+ // next instruction will be the first of another basic block and the current
+ // state will typically not be valid anymore. For calls, we assume all
+ // registers may be clobbered by the callee (TODO: should we take the
+ // calling convention into account?).
+ if (isTerminator(Inst) || isCall(Inst)) {
+ resetState();
+ return;
+ }
+
+ switch (Inst.getOpcode()) {
+ default: {
+ // Clear the state of all defined registers for instructions that we don't
+ // explicitly support.
+ auto NumDefs = Info->get(Inst.getOpcode()).getNumDefs();
+ for (unsigned I = 0; I < NumDefs; ++I) {
+ auto DefReg = Inst.getOperand(I).getReg();
+ if (isGPR(DefReg))
+ setGPRState(DefReg, std::nullopt);
+ }
+ break;
+ }
+ case LoongArch::PCADDU18I:
+ setGPRState(
+ Inst.getOperand(0).getReg(),
+ Addr + SignExtend64<38>(
+ static_cast<uint64_t>(Inst.getOperand(1).getImm()) << 18));
+ break;
+ }
+ }
+
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override {
unsigned NumOps = Inst.getNumOperands();
@@ -108,6 +180,14 @@ class LoongArchMCInstrAnalysis : public MCInstrAnalysis {
return true;
}
+ if (Inst.getOpcode() == LoongArch::JIRL) {
+ if (auto TargetRegState = getGPRState(Inst.getOperand(1).getReg())) {
+ Target = *TargetRegState + Inst.getOperand(2).getImm();
+ return true;
+ }
+ return false;
+ }
+
return false;
}
diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp
index aebb5effd0be7..38893cf974a10 100644
--- a/llvm/tools/sancov/sancov.cpp
+++ b/llvm/tools/sancov/sancov.cpp
@@ -730,7 +730,7 @@ static void getObjectCoveragePoints(const object::ObjectFile &O,
std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
failIfEmpty(MII, "no instruction info for target " + TripleName);
- std::unique_ptr<const MCInstrAnalysis> MIA(
+ std::unique_ptr<MCInstrAnalysis> MIA(
TheTarget->createMCInstrAnalysis(MII.get()));
failIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
@@ -750,6 +750,9 @@ static void getObjectCoveragePoints(const object::ObjectFile &O,
failIfError(BytesStr);
ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*BytesStr);
+ if (MIA)
+ MIA->resetState();
+
for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
Index += Size) {
MCInst Inst;
@@ -760,6 +763,7 @@ static void getObjectCoveragePoints(const object::ObjectFile &O,
Size = std::min<uint64_t>(
ThisBytes.size(),
DisAsm->suggestBytesToSkip(ThisBytes, ThisAddr));
+ MIA->resetState();
continue;
}
uint64_t Addr = Index + SectionAddr;
@@ -770,6 +774,7 @@ static void getObjectCoveragePoints(const object::ObjectFile &O,
MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) &&
SanCovAddrs.find(Target) != SanCovAddrs.end())
Addrs->insert(CovPoint);
+ MIA->updateState(Inst, Addr);
}
}
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/155964
More information about the llvm-commits
mailing list