[llvm] [LoongArch] Merge base and offset for tls-le code sequence (PR #122999)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 10 01:04:44 PST 2025
https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/122999
>From 6ee3b7aec4cafa09ee60371c21669c7197a44cc2 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Mon, 30 Dec 2024 15:35:12 +0800
Subject: [PATCH 1/8] [LoongArch] Add relax relocations for tls_le code
sequence
This commit add relax relocations for tls_le code sequence.
Handwritten assembly and generating source code by clang are
both affected.
Scheduled tls_le code sequence can be relaxed normally. So we
can add relax relocations when code emitting according to their
relocs. Other relaxable macros' code sequence cannot be
scheduled when relaxation enabled. Attaching relax relocations
for them will be implemented in later commit.
---
.../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 11 ++-
.../MC/LoongArch/Relocations/relax-tls-le.s | 70 +++++++++++++++++++
2 files changed, 80 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/MC/LoongArch/Relocations/relax-tls-le.s
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index 359bde12444291e..187869bfa241b1c 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -282,9 +282,11 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
break;
case LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20_R:
FixupKind = LoongArch::fixup_loongarch_tls_le_hi20_r;
+ RelaxCandidate = true;
break;
case LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12_R:
FixupKind = LoongArch::fixup_loongarch_tls_le_lo12_r;
+ RelaxCandidate = true;
break;
case LoongArchMCExpr::VK_LoongArch_PCREL20_S2:
FixupKind = LoongArch::fixup_loongarch_pcrel20_s2;
@@ -387,11 +389,18 @@ void LoongArchMCCodeEmitter::expandAddTPRel(const MCInst &MI,
"Expected %le_add_r relocation on TP-relative symbol");
// Emit the correct %le_add_r relocation for the symbol.
- // TODO: Emit R_LARCH_RELAX for %le_add_r where the relax feature is enabled.
Fixups.push_back(MCFixup::create(
0, Expr, MCFixupKind(LoongArch::fixup_loongarch_tls_le_add_r),
MI.getLoc()));
+ // Emit R_LARCH_RELAX for %le_add_r when the relax feature is enabled.
+ bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax);
+ if (EnableRelax) {
+ const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx);
+ Fixups.push_back(MCFixup::create(
+ 0, Dummy, MCFixupKind(LoongArch::fixup_loongarch_relax), MI.getLoc()));
+ }
+
// Emit a normal ADD instruction with the given operands.
unsigned ADD = MI.getOpcode() == LoongArch::PseudoAddTPRel_D
? LoongArch::ADD_D
diff --git a/llvm/test/MC/LoongArch/Relocations/relax-tls-le.s b/llvm/test/MC/LoongArch/Relocations/relax-tls-le.s
new file mode 100644
index 000000000000000..899f12f85654d4c
--- /dev/null
+++ b/llvm/test/MC/LoongArch/Relocations/relax-tls-le.s
@@ -0,0 +1,70 @@
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=+relax < %s \
+# RUN: | llvm-readobj -r - | FileCheck --check-prefix=LA32-RELAX-RELOC %s
+# RUN: llvm-mc --filetype=obj --triple=loongarch32 --mattr=-relax < %s \
+# RUN: | llvm-readobj -r - | FileCheck --check-prefix=LA32-NORELAX-RELOC %s
+# RUN: llvm-mc --triple=loongarch32 --mattr=+relax < %s --show-encoding \
+# RUN: | FileCheck --check-prefix=LA32-RELAX-FIXUP %s
+
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax --defsym=LA64=1 < %s \
+# RUN: | llvm-readobj -r - | FileCheck --check-prefix=LA64-RELAX-RELOC %s
+# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax --defsym=LA64=1 < %s \
+# RUN: | llvm-readobj -r - | FileCheck --check-prefix=LA64-NORELAX-RELOC %s
+# RUN: llvm-mc --triple=loongarch64 --mattr=+relax --defsym=LA64=1 < %s --show-encoding \
+# RUN: | FileCheck --check-prefix=LA64-RELAX-FIXUP %s
+
+.long foo
+
+.ifndef LA64
+
+lu12i.w $a0, %le_hi20_r(foo)
+# LA32-NORELAX-RELOC: R_LARCH_TLS_LE_HI20_R foo 0x0
+# LA32-NORELAX-RELOC-NOT: R_LARCH_RELAX - 0x0
+# LA32-RELAX-RELOC: R_LARCH_TLS_LE_HI20_R foo 0x0
+# LA32-RELAX-RELOC: R_LARCH_RELAX - 0x0
+# LA32-RELAX-FIXUP: fixup A - offset: 0, value: %le_hi20_r(foo), kind: FK_NONE
+# LA32-RELAX-FIXUP: fixup B - offset: 0, value: 0, kind: FK_NONE
+
+add.w $a0, $a0, $tp, %le_add_r(foo)
+# LA32-NORELAX-RELOC: R_LARCH_TLS_LE_ADD_R foo 0x0
+# LA32-NORELAX-RELOC-NOT: R_LARCH_RELAX - 0x0
+# LA32-RELAX-RELOC: R_LARCH_TLS_LE_ADD_R foo 0x0
+# LA32-RELAX-RELOC: R_LARCH_RELAX - 0x0
+# LA32-RELAX-FIXUP: fixup A - offset: 0, value: %le_add_r(foo), kind: FK_NONE
+# LA32-RELAX-FIXUP: fixup B - offset: 0, value: 0, kind: FK_NONE
+
+addi.w $a0, $a0, %le_lo12_r(foo)
+# LA32-NORELAX-RELOC: R_LARCH_TLS_LE_LO12_R foo 0x0
+# LA32-NORELAX-RELOC-NOT: R_LARCH_RELAX - 0x0
+# LA32-RELAX-RELOC: R_LARCH_TLS_LE_LO12_R foo 0x0
+# LA32-RELAX-RELOC: R_LARCH_RELAX - 0x0
+# LA32-RELAX-FIXUP: fixup A - offset: 0, value: %le_lo12_r(foo), kind: FK_NONE
+# LA32-RELAX-FIXUP: fixup B - offset: 0, value: 0, kind: FK_NONE
+
+.else
+
+lu12i.w $a0, %le_hi20_r(foo)
+# LA64-NORELAX-RELOC: R_LARCH_TLS_LE_HI20_R foo 0x0
+# LA64-NORELAX-RELOC-NOT: R_LARCH_RELAX - 0x0
+# LA64-RELAX-RELOC: R_LARCH_TLS_LE_HI20_R foo 0x0
+# LA64-RELAX-RELOC: R_LARCH_RELAX - 0x0
+# LA64-RELAX-FIXUP: fixup A - offset: 0, value: %le_hi20_r(foo), kind: FK_NONE
+# LA64-RELAX-FIXUP: fixup B - offset: 0, value: 0, kind: FK_NONE
+
+add.d $a0, $a0, $tp, %le_add_r(foo)
+# LA64-NORELAX-RELOC: R_LARCH_TLS_LE_ADD_R foo 0x0
+# LA64-NORELAX-RELOC-NOT: R_LARCH_RELAX - 0x0
+# LA64-RELAX-RELOC: R_LARCH_TLS_LE_ADD_R foo 0x0
+# LA64-RELAX-RELOC: R_LARCH_RELAX - 0x0
+# LA64-RELAX-FIXUP: fixup A - offset: 0, value: %le_add_r(foo), kind: FK_NONE
+# LA64-RELAX-FIXUP: fixup B - offset: 0, value: 0, kind: FK_NONE
+
+addi.d $a0, $a0, %le_lo12_r(foo)
+# LA64-NORELAX-RELOC: R_LARCH_TLS_LE_LO12_R foo 0x0
+# LA64-NORELAX-RELOC-NOT: R_LARCH_RELAX - 0x0
+# LA64-RELAX-RELOC: R_LARCH_TLS_LE_LO12_R foo 0x0
+# LA64-RELAX-RELOC: R_LARCH_RELAX - 0x0
+# LA64-RELAX-FIXUP: fixup A - offset: 0, value: %le_lo12_r(foo), kind: FK_NONE
+# LA64-RELAX-FIXUP: fixup B - offset: 0, value: 0, kind: FK_NONE
+
+.endif
+
>From 85be5541a23a859ad8e50bd75fb7ff35985c5988 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 24 Dec 2024 11:03:23 +0800
Subject: [PATCH 2/8] [LoongArch] Avoid scheduling relaxable code sequence and
attach relax relocs
If linker relaxation enabled, relaxable code sequence expanded
from pseudos should avoid being separated by instruction scheduling.
This commit tags scheduling boundary for them to avoid being
scheduled. (Except for `tls_le` and `call36/tail36`. Because
`tls_le` can be scheduled and have no influence to relax,
`call36/tail36` are expanded later in `LoongArchExpandPseudo` pass.)
A new mask target-flag is added to attach relax relocs to the
relaxable code sequence. (No need to add it for `tls_le` and
`call36/tail36` because of the reasons shown above.) Because of this,
get "direct" flags is necessary when using their target-flags.
In addition, code sequence after being optimized by `MergeBaseOffset`
pass may not relaxable any more, so the relax "bitmask" flag should
be removed.
---
.../LoongArch/LoongArchExpandPseudoInsts.cpp | 34 ++++--
.../Target/LoongArch/LoongArchInstrInfo.cpp | 99 ++++++++++++++++-
.../lib/Target/LoongArch/LoongArchInstrInfo.h | 3 +
.../Target/LoongArch/LoongArchMCInstLower.cpp | 4 +-
.../LoongArch/LoongArchMergeBaseOffset.cpp | 30 +++++-
.../LoongArch/LoongArchTargetMachine.cpp | 1 +
.../MCTargetDesc/LoongArchBaseInfo.h | 22 ++++
.../MCTargetDesc/LoongArchMCCodeEmitter.cpp | 1 +
.../CodeGen/LoongArch/linker-relaxation.ll | 102 ++++++++++++++++++
.../test/CodeGen/LoongArch/mir-relax-flags.ll | 64 +++++++++++
.../CodeGen/LoongArch/mir-target-flags.ll | 31 +++++-
11 files changed, 370 insertions(+), 21 deletions(-)
create mode 100644 llvm/test/CodeGen/LoongArch/linker-relaxation.ll
create mode 100644 llvm/test/CodeGen/LoongArch/mir-relax-flags.ll
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index 0218934ea3344a7..be60de3d63d061c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -187,18 +187,23 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair(
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
+ const auto &STI = MF->getSubtarget<LoongArchSubtarget>();
+ bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax);
+
Register DestReg = MI.getOperand(0).getReg();
Register ScratchReg =
MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
MachineOperand &Symbol = MI.getOperand(1);
BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg)
- .addDisp(Symbol, 0, FlagsHi);
+ .addDisp(Symbol, 0,
+ EnableRelax ? LoongArchII::addRelaxFlag(FlagsHi) : FlagsHi);
MachineInstr *SecondMI =
BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg)
.addReg(ScratchReg)
- .addDisp(Symbol, 0, FlagsLo);
+ .addDisp(Symbol, 0,
+ EnableRelax ? LoongArchII::addRelaxFlag(FlagsLo) : FlagsLo);
if (MI.hasOneMemOperand())
SecondMI->addMemOperand(*MF, *MI.memoperands_begin());
@@ -481,6 +486,7 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSDesc(
unsigned ADD = STI.is64Bit() ? LoongArch::ADD_D : LoongArch::ADD_W;
unsigned ADDI = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
unsigned LD = STI.is64Bit() ? LoongArch::LD_D : LoongArch::LD_W;
+ bool EnableRelax = STI.hasFeature(LoongArch::FeatureRelax);
Register DestReg = MI.getOperand(0).getReg();
Register Tmp1Reg =
@@ -488,7 +494,10 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSDesc(
MachineOperand &Symbol = MI.getOperand(Large ? 2 : 1);
BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), Tmp1Reg)
- .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_HI);
+ .addDisp(Symbol, 0,
+ (EnableRelax && !Large)
+ ? LoongArchII::addRelaxFlag(LoongArchII::MO_DESC_PC_HI)
+ : LoongArchII::MO_DESC_PC_HI);
if (Large) {
// Code Sequence:
@@ -526,19 +535,28 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSDesc(
// pcalau12i $a0, %desc_pc_hi20(sym)
// addi.w/d $a0, $a0, %desc_pc_lo12(sym)
// ld.w/d $ra, $a0, %desc_ld(sym)
- // jirl $ra, $ra, %desc_ld(sym)
- // add.d $dst, $a0, $tp
+ // jirl $ra, $ra, %desc_call(sym)
+ // add.w/d $dst, $a0, $tp
BuildMI(MBB, MBBI, DL, TII->get(ADDI), LoongArch::R4)
.addReg(Tmp1Reg)
- .addDisp(Symbol, 0, LoongArchII::MO_DESC_PC_LO);
+ .addDisp(Symbol, 0,
+ EnableRelax
+ ? LoongArchII::addRelaxFlag(LoongArchII::MO_DESC_PC_LO)
+ : LoongArchII::MO_DESC_PC_LO);
}
BuildMI(MBB, MBBI, DL, TII->get(LD), LoongArch::R1)
.addReg(LoongArch::R4)
- .addDisp(Symbol, 0, LoongArchII::MO_DESC_LD);
+ .addDisp(Symbol, 0,
+ (EnableRelax && !Large)
+ ? LoongArchII::addRelaxFlag(LoongArchII::MO_DESC_LD)
+ : LoongArchII::MO_DESC_LD);
BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PseudoDESC_CALL), LoongArch::R1)
.addReg(LoongArch::R1)
- .addDisp(Symbol, 0, LoongArchII::MO_DESC_CALL);
+ .addDisp(Symbol, 0,
+ (EnableRelax && !Large)
+ ? LoongArchII::addRelaxFlag(LoongArchII::MO_DESC_CALL)
+ : LoongArchII::MO_DESC_CALL);
BuildMI(MBB, MBBI, DL, TII->get(ADD), DestReg)
.addReg(LoongArch::R4)
.addReg(LoongArch::R2);
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 7d0e4f9d58a16d0..13c8a5a39b6f4a6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -443,6 +443,89 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
break;
}
+ const auto &STI = MF.getSubtarget<LoongArchSubtarget>();
+ if (STI.hasFeature(LoongArch::FeatureRelax)) {
+ // When linker relaxation enabled, the following instruction patterns are
+ // prohibited from being reordered:
+ //
+ // * pcalau12i $a0, %pc_hi20(s)
+ // addi.w/d $a0, $a0, %pc_lo12(s)
+ //
+ // * pcalau12i $a0, %got_pc_hi20(s)
+ // ld.w/d $a0, $a0, %got_pc_lo12(s)
+ //
+ // * pcalau12i $a0, %ie_pc_hi20(s)
+ // ld.w/d $a0, $a0, %ie_pc_lo12(s)
+ //
+ // * pcalau12i $a0, %ld_pc_hi20(s) | %gd_pc_hi20(s)
+ // addi.w/d $a0, $a0, %got_pc_lo12(s)
+ //
+ // * pcalau12i $a0, %desc_pc_hi20(s)
+ // addi.w/d $a0, $a0, %desc_pc_lo12(s)
+ // ld.w/d $ra, $a0, %desc_ld(s)
+ // jirl $ra, $ra, %desc_call(s)
+ unsigned AddiOp = STI.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
+ unsigned LdOp = STI.is64Bit() ? LoongArch::LD_D : LoongArch::LD_W;
+ switch (MI.getOpcode()) {
+ case LoongArch::PCALAU12I: {
+ auto MO0 = LoongArchII::getDirectFlags(MI.getOperand(1));
+ auto SecondOp = std::next(MII);
+ if (MO0 == LoongArchII::MO_DESC_PC_HI) {
+ if (SecondOp == MIE || SecondOp->getOpcode() != AddiOp)
+ break;
+ auto Ld = std::next(SecondOp);
+ if (Ld == MIE || Ld->getOpcode() != LdOp)
+ break;
+ auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2));
+ auto MO2 = LoongArchII::getDirectFlags(Ld->getOperand(2));
+ if (MO1 == LoongArchII::MO_DESC_PC_LO && MO2 == LoongArchII::MO_DESC_LD)
+ return true;
+ break;
+ }
+ if (SecondOp == MIE ||
+ (SecondOp->getOpcode() != AddiOp && SecondOp->getOpcode() != LdOp))
+ break;
+ auto MO1 = LoongArchII::getDirectFlags(SecondOp->getOperand(2));
+ if (MO0 == LoongArchII::MO_PCREL_HI && SecondOp->getOpcode() == AddiOp &&
+ MO1 == LoongArchII::MO_PCREL_LO)
+ return true;
+ if (MO0 == LoongArchII::MO_GOT_PC_HI && SecondOp->getOpcode() == LdOp &&
+ MO1 == LoongArchII::MO_GOT_PC_LO)
+ return true;
+ if (MO0 == LoongArchII::MO_IE_PC_HI && SecondOp->getOpcode() == LdOp &&
+ MO1 == LoongArchII::MO_IE_PC_LO)
+ return true;
+ if ((MO0 == LoongArchII::MO_LD_PC_HI ||
+ MO0 == LoongArchII::MO_GD_PC_HI) &&
+ SecondOp->getOpcode() == AddiOp && MO1 == LoongArchII::MO_GOT_PC_LO)
+ return true;
+ break;
+ }
+ case LoongArch::ADDI_W:
+ case LoongArch::ADDI_D: {
+ auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
+ if (MO == LoongArchII::MO_PCREL_LO || MO == LoongArchII::MO_GOT_PC_LO)
+ return true;
+ break;
+ }
+ case LoongArch::LD_W:
+ case LoongArch::LD_D: {
+ auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
+ if (MO == LoongArchII::MO_GOT_PC_LO || MO == LoongArchII::MO_IE_PC_LO)
+ return true;
+ break;
+ }
+ case LoongArch::PseudoDESC_CALL: {
+ auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
+ if (MO == LoongArchII::MO_DESC_CALL)
+ return true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
return false;
}
@@ -618,7 +701,8 @@ bool LoongArchInstrInfo::reverseBranchCondition(
std::pair<unsigned, unsigned>
LoongArchInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
- return std::make_pair(TF, 0u);
+ const unsigned Mask = LoongArchII::MO_DIRECT_FLAG_MASK;
+ return std::make_pair(TF & Mask, TF & ~Mask);
}
ArrayRef<std::pair<unsigned, const char *>>
@@ -644,20 +728,29 @@ LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_IE_PC_LO, "loongarch-ie-pc-lo"},
{MO_IE_PC64_LO, "loongarch-ie-pc64-lo"},
{MO_IE_PC64_HI, "loongarch-ie-pc64-hi"},
+ {MO_LD_PC_HI, "loongarch-ld-pc-hi"},
+ {MO_GD_PC_HI, "loongarch-gd-pc-hi"},
+ {MO_CALL36, "loongarch-call36"},
{MO_DESC_PC_HI, "loongarch-desc-pc-hi"},
{MO_DESC_PC_LO, "loongarch-desc-pc-lo"},
{MO_DESC64_PC_LO, "loongarch-desc64-pc-lo"},
{MO_DESC64_PC_HI, "loongarch-desc64-pc-hi"},
{MO_DESC_LD, "loongarch-desc-ld"},
{MO_DESC_CALL, "loongarch-desc-call"},
- {MO_LD_PC_HI, "loongarch-ld-pc-hi"},
- {MO_GD_PC_HI, "loongarch-gd-pc-hi"},
{MO_LE_HI_R, "loongarch-le-hi-r"},
{MO_LE_ADD_R, "loongarch-le-add-r"},
{MO_LE_LO_R, "loongarch-le-lo-r"}};
return ArrayRef(TargetFlags);
}
+ArrayRef<std::pair<unsigned, const char *>>
+LoongArchInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+ using namespace LoongArchII;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_RELAX, "loongarch-relax"}};
+ return ArrayRef(TargetFlags);
+}
+
// Returns true if this is the sext.w pattern, addi.w rd, rs, 0.
bool LoongArch::isSEXT_W(const MachineInstr &MI) {
return MI.getOpcode() == LoongArch::ADDI_W && MI.getOperand(1).isReg() &&
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index ef9970783107ea5..a5b31878bfa1c29 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -91,6 +91,9 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const override;
+
protected:
const LoongArchSubtarget &STI;
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
index d1de0609f24ce28..d87ed068ebff8af 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
@@ -27,7 +27,7 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
MCContext &Ctx = AP.OutContext;
LoongArchMCExpr::VariantKind Kind;
- switch (MO.getTargetFlags()) {
+ switch (LoongArchII::getDirectFlags(MO)) {
default:
llvm_unreachable("Unknown target flag on GV operand");
case LoongArchII::MO_None:
@@ -134,7 +134,7 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
if (Kind != LoongArchMCExpr::VK_LoongArch_None)
- ME = LoongArchMCExpr::create(ME, Kind, Ctx);
+ ME = LoongArchMCExpr::create(ME, Kind, Ctx, LoongArchII::hasRelaxFlag(MO));
return MCOperand::createExpr(ME);
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
index e9455fdd23ba548..7f98f7718a538d6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -105,7 +105,7 @@ bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
return false;
const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
- if (Hi20Op1.getTargetFlags() != LoongArchII::MO_PCREL_HI)
+ if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_PCREL_HI)
return false;
auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) {
@@ -157,7 +157,7 @@ bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
assert(Hi20.getOpcode() == LoongArch::PCALAU12I);
- if (Lo12Op2.getTargetFlags() != LoongArchII::MO_PCREL_LO ||
+ if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_PCREL_LO ||
!(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
Lo12Op2.getOffset() != 0)
return false;
@@ -597,9 +597,28 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
if (!isInt<32>(NewOffset))
return false;
+ // If optimized by this pass successfully, MO_RELAX bitmask target-flag should
+ // be removed from the code sequence.
+ //
+ // For example:
+ // pcalau12i $a0, %pc_hi20(symbol)
+ // addi.d $a0, $a0, %pc_lo12(symbol)
+ // ld.w $a0, $a0, 0
+ //
+ // =>
+ //
+ // pcalau12i $a0, %pc_hi20(symbol)
+ // ld.w $a0, $a0, %pc_lo12(symbol)
+ //
+ // Code sequence optimized before can be relax by linker. But after being
+ // optimized, it cannot be relaxed any more. So MO_RELAX flag should not be
+ // carried by them.
Hi20.getOperand(1).setOffset(NewOffset);
+ Hi20.getOperand(1).setTargetFlags(
+ LoongArchII::getDirectFlags(Hi20.getOperand(1)));
MachineOperand &ImmOp = Lo12.getOperand(2);
ImmOp.setOffset(NewOffset);
+ ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
if (Lo20 && Hi12) {
Lo20->getOperand(2).setOffset(NewOffset);
Hi12->getOperand(2).setOffset(NewOffset);
@@ -617,15 +636,16 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
switch (ImmOp.getType()) {
case MachineOperand::MO_GlobalAddress:
MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
- ImmOp.getTargetFlags());
+ LoongArchII::getDirectFlags(ImmOp));
break;
case MachineOperand::MO_MCSymbol:
- MO.ChangeToMCSymbol(ImmOp.getMCSymbol(), ImmOp.getTargetFlags());
+ MO.ChangeToMCSymbol(ImmOp.getMCSymbol(),
+ LoongArchII::getDirectFlags(ImmOp));
MO.setOffset(ImmOp.getOffset());
break;
case MachineOperand::MO_BlockAddress:
MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(),
- ImmOp.getTargetFlags());
+ LoongArchII::getDirectFlags(ImmOp));
break;
default:
report_fatal_error("unsupported machine operand type");
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index b611365f608af97..62b08be5435cda8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -38,6 +38,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() {
initializeLoongArchMergeBaseOffsetOptPass(*PR);
initializeLoongArchOptWInstrsPass(*PR);
initializeLoongArchPreRAExpandPseudoPass(*PR);
+ initializeLoongArchExpandPseudoPass(*PR);
initializeLoongArchDAGToDAGISelLegacyPass(*PR);
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
index 23699043b9926a1..371ae580419b21b 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
@@ -17,6 +17,7 @@
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/TargetParser/SubtargetFeature.h"
@@ -58,8 +59,29 @@ enum {
MO_LE_ADD_R,
MO_LE_LO_R,
// TODO: Add more flags.
+
+ // Used to differentiate between target-specific "direct" flags and "bitmask"
+ // flags. A machine operand can only have one "direct" flag, but can have
+ // multiple "bitmask" flags.
+ MO_DIRECT_FLAG_MASK = 0x3f,
+
+ MO_RELAX = 0x40
};
+// Given a MachineOperand that may carry out "bitmask" flags, such as MO_RELAX,
+// return LoongArch target-specific "direct" flags.
+static inline unsigned getDirectFlags(const MachineOperand &MO) {
+ return MO.getTargetFlags() & MO_DIRECT_FLAG_MASK;
+}
+
+// Add MO_RELAX "bitmask" flag when FeatureRelax is enabled.
+static inline unsigned addRelaxFlag(unsigned Flags) { return Flags | MO_RELAX; }
+
+// \returns true if the given MachineOperand has MO_RELAX "bitmask" flag.
+static inline bool hasRelaxFlag(const MachineOperand &MO) {
+ return MO.getTargetFlags() & MO_RELAX;
+}
+
// Target-specific flags of LAInst.
// All definitions must match LoongArchInstrFormats.td.
enum {
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index 187869bfa241b1c..71f044dadf8be54 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -249,6 +249,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
break;
case LoongArchMCExpr::VK_LoongArch_CALL36:
FixupKind = LoongArch::fixup_loongarch_call36;
+ RelaxCandidate = true;
break;
case LoongArchMCExpr::VK_LoongArch_TLS_DESC_PC_HI20:
FixupKind = LoongArch::fixup_loongarch_tls_desc_pc_hi20;
diff --git a/llvm/test/CodeGen/LoongArch/linker-relaxation.ll b/llvm/test/CodeGen/LoongArch/linker-relaxation.ll
new file mode 100644
index 000000000000000..2827a95547903bf
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/linker-relaxation.ll
@@ -0,0 +1,102 @@
+; RUN: llc --mtriple=loongarch64 --filetype=obj -mattr=-relax \
+; RUN: --relocation-model=pic --code-model=medium < %s \
+; RUN: | llvm-readobj -r - | FileCheck --check-prefixes=CHECK-RELOC,PCALA-RELOC %s
+; RUN: llc --mtriple=loongarch64 --filetype=obj -mattr=+relax \
+; RUN: --relocation-model=pic --code-model=medium < %s \
+; RUN: | llvm-readobj -r - | FileCheck --check-prefixes=CHECK-RELOC,RELAX %s
+
+; RUN: llc --mtriple=loongarch64 --filetype=obj -mattr=-relax --enable-tlsdesc \
+; RUN: --relocation-model=pic --code-model=medium < %s \
+; RUN: | llvm-readobj -r - | FileCheck --check-prefix=DESC-RELOC %s
+; RUN: llc --mtriple=loongarch64 --filetype=obj -mattr=+relax --enable-tlsdesc \
+; RUN: --relocation-model=pic --code-model=medium < %s \
+; RUN: | llvm-readobj -r - | FileCheck --check-prefixes=DESC-RELOC,DESC-RELAX %s
+
+;; Check relocations when disable or enable linker relaxation.
+;; This tests are also able to test for removing relax mask flags
+;; after loongarch-merge-base-offset pass because no relax relocs
+;; are emitted after being optimized by it.
+
+ at g_e = external global i32
+ at g_i = internal global i32 0
+ at g_i1 = internal global i32 1
+ at t_un = external thread_local global i32
+ at t_ld = external thread_local(localdynamic) global i32
+ at t_ie = external thread_local(initialexec) global i32
+ at t_le = external thread_local(localexec) global i32
+
+declare void @callee1() nounwind
+declare dso_local void @callee2() nounwind
+declare dso_local void @callee3() nounwind
+
+define ptr @caller() nounwind {
+; RELAX: R_LARCH_ALIGN - 0x1C
+; CHECK-RELOC: R_LARCH_GOT_PC_HI20 g_e 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_GOT_PC_LO12 g_e 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; PCALA-RELOC: R_LARCH_PCALA_HI20 .bss 0x0
+; RELAX-NEXT: R_LARCH_PCALA_HI20 g_i 0x0
+; PCALA-RELOC: R_LARCH_PCALA_LO12 .bss 0x0
+; RELAX-NEXT: R_LARCH_PCALA_LO12 g_i 0x0
+; CHECK-RELOC: R_LARCH_TLS_GD_PC_HI20 t_un 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_GOT_PC_LO12 t_un 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_CALL36 __tls_get_addr 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; DESC-RELOC: R_LARCH_TLS_DESC_PC_HI20 t_un 0x0
+; DESC-RELAX: R_LARCH_RELAX - 0x0
+; DESC-RELOC-NEXT: R_LARCH_TLS_DESC_PC_LO12 t_un 0x0
+; DESC-RELAX-NEXT: R_LARCH_RELAX - 0x0
+; DESC-RELOC-NEXT: R_LARCH_TLS_DESC_LD t_un 0x0
+; DESC-RELAX-NEXT: R_LARCH_RELAX - 0x0
+; DESC-RELOC-NEXT: R_LARCH_TLS_DESC_CALL t_un 0x0
+; DESC-RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_TLS_LD_PC_HI20 t_ld 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_GOT_PC_LO12 t_ld 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_CALL36 __tls_get_addr 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; DESC-RELOC-NEXT: R_LARCH_TLS_DESC_PC_HI20 t_ld 0x0
+; DESC-RELAX-NEXT: R_LARCH_RELAX - 0x0
+; DESC-RELOC-NEXT: R_LARCH_TLS_DESC_PC_LO12 t_ld 0x0
+; DESC-RELAX-NEXT: R_LARCH_RELAX - 0x0
+; DESC-RELOC-NEXT: R_LARCH_TLS_DESC_LD t_ld 0x0
+; DESC-RELAX-NEXT: R_LARCH_RELAX - 0x0
+; DESC-RELOC-NEXT: R_LARCH_TLS_DESC_CALL t_ld 0x0
+; DESC-RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_TLS_IE_PC_HI20 t_ie 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_TLS_IE_PC_LO12 t_ie 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_TLS_LE_HI20_R t_le 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_TLS_LE_ADD_R t_le 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_TLS_LE_LO12_R t_le 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_CALL36 callee1 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_CALL36 callee2 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; CHECK-RELOC-NEXT: R_LARCH_CALL36 callee3 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; PCALA-RELOC: R_LARCH_PCALA_HI20 .data 0x0
+; RELAX-NEXT: R_LARCH_PCALA_HI20 g_i1 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+; PCALA-RELOC: R_LARCH_PCALA_LO12 .data 0x0
+; RELAX-NEXT: R_LARCH_PCALA_LO12 g_i1 0x0
+; RELAX-NEXT: R_LARCH_RELAX - 0x0
+ %a = load volatile i32, ptr @g_e
+ %b = load volatile i32, ptr @g_i
+ %c = load volatile i32, ptr @t_un
+ %d = load volatile i32, ptr @t_ld
+ %e = load volatile i32, ptr @t_ie
+ %f = load volatile i32, ptr @t_le
+ call i32 @callee1()
+ call i32 @callee2()
+ tail call i32 @callee3()
+ ret ptr @g_i1
+}
diff --git a/llvm/test/CodeGen/LoongArch/mir-relax-flags.ll b/llvm/test/CodeGen/LoongArch/mir-relax-flags.ll
new file mode 100644
index 000000000000000..b894de50eed29ab
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/mir-relax-flags.ll
@@ -0,0 +1,64 @@
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+relax --stop-after loongarch-prera-expand-pseudo \
+; RUN: --relocation-model=pic --code-model=medium %s -o %t.mir
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+relax --run-pass loongarch-prera-expand-pseudo \
+; RUN: --code-model=medium %t.mir -o - | FileCheck %s --check-prefixes=CHECK,MEDCALL
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+relax --run-pass loongarch-expand-pseudo \
+; RUN: --code-model=medium %t.mir -o - | FileCheck %s --check-prefixes=CHECK,CALL36
+
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+relax --stop-after loongarch-prera-expand-pseudo \
+; RUN: --relocation-model=pic --enable-tlsdesc --code-model=medium %s -o %t.desc.mir
+; RUN: llc --mtriple=loongarch64 -mattr=+d,+relax --run-pass loongarch-prera-expand-pseudo \
+; RUN: --code-model=medium %t.desc.mir -o - | FileCheck %s --check-prefix=DESC
+
+;; Check target-flags after expand-pseudo pass.
+
+ at g_e = external global i32
+ at g_i = internal global i32 0
+ at t_un = external thread_local global i32
+ at t_ld = external thread_local(localdynamic) global i32
+ at t_ie = external thread_local(initialexec) global i32
+ at t_le = external thread_local(localexec) global i32
+
+declare void @callee1() nounwind
+declare dso_local void @callee2() nounwind
+declare dso_local void @callee3() nounwind
+
+define void @caller() nounwind {
+; CHECK: target-flags(loongarch-got-pc-hi, loongarch-relax) @g_e
+; CHECK-NEXT: target-flags(loongarch-got-pc-lo, loongarch-relax) @g_e
+; CHECK: target-flags(loongarch-pcrel-hi, loongarch-relax) @g_i
+; CHECK-NEXT: target-flags(loongarch-pcrel-lo, loongarch-relax) @g_i
+; CHECK: target-flags(loongarch-gd-pc-hi, loongarch-relax) @t_un
+; CHECK-NEXT: target-flags(loongarch-got-pc-lo, loongarch-relax) @t_un
+; DESC: target-flags(loongarch-desc-pc-hi, loongarch-relax) @t_un
+; DESC-NEXT: target-flags(loongarch-desc-pc-lo, loongarch-relax) @t_un
+; DESC-NEXT: target-flags(loongarch-desc-ld, loongarch-relax) @t_un
+; DESC-NEXT: target-flags(loongarch-desc-call, loongarch-relax) @t_un
+; CHECK: target-flags(loongarch-ld-pc-hi, loongarch-relax) @t_ld
+; CHECK-NEXT: target-flags(loongarch-got-pc-lo, loongarch-relax) @t_ld
+; DESC: target-flags(loongarch-desc-pc-hi, loongarch-relax) @t_ld
+; DESC-NEXT: target-flags(loongarch-desc-pc-lo, loongarch-relax) @t_ld
+; DESC-NEXT: target-flags(loongarch-desc-ld, loongarch-relax) @t_ld
+; DESC-NEXT: target-flags(loongarch-desc-call, loongarch-relax) @t_ld
+; CHECK: target-flags(loongarch-ie-pc-hi, loongarch-relax) @t_ie
+; CHECK-NEXT: target-flags(loongarch-ie-pc-lo, loongarch-relax) @t_ie
+; CHECK: target-flags(loongarch-le-hi-r) @t_le
+; CHECK-NEXT: target-flags(loongarch-le-add-r) @t_le
+; CHECK-NEXT: target-flags(loongarch-le-lo-r) @t_le
+; MEDCALL: target-flags(loongarch-call-plt) @callee1
+; CALL36: target-flags(loongarch-call36) @callee1
+; MEDCALL: target-flags(loongarch-call) @callee2
+; CALL36: target-flags(loongarch-call36) @callee2
+; MEDCALL: target-flags(loongarch-call) @callee3
+; CALL36: target-flags(loongarch-call36) @callee3
+ %a = load volatile i32, ptr @g_e
+ %b = load volatile i32, ptr @g_i
+ %c = load volatile i32, ptr @t_un
+ %d = load volatile i32, ptr @t_ld
+ %e = load volatile i32, ptr @t_ie
+ %f = load volatile i32, ptr @t_le
+ call i32 @callee1()
+ call i32 @callee2()
+ tail call i32 @callee3()
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/mir-target-flags.ll b/llvm/test/CodeGen/LoongArch/mir-target-flags.ll
index 3bc8a8d30958686..51c2a19da80e3c2 100644
--- a/llvm/test/CodeGen/LoongArch/mir-target-flags.ll
+++ b/llvm/test/CodeGen/LoongArch/mir-target-flags.ll
@@ -1,7 +1,18 @@
-; RUN: llc --mtriple=loongarch64 -mattr=+d --stop-after loongarch-prera-expand-pseudo \
-; RUN: --relocation-model=pic %s -o %t.mir
-; RUN: llc --mtriple=loongarch64 -mattr=+d --run-pass loongarch-prera-expand-pseudo \
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-relax --stop-after loongarch-prera-expand-pseudo \
+; RUN: --relocation-model=pic --code-model=small %s -o %t.mir
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-relax --run-pass loongarch-prera-expand-pseudo \
; RUN: %t.mir -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-relax --stop-after loongarch-prera-expand-pseudo \
+; RUN: --relocation-model=pic --enable-tlsdesc --code-model=small %s -o %t.desc.mir
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-relax --run-pass loongarch-prera-expand-pseudo \
+; RUN: %t.desc.mir -o - | FileCheck %s --check-prefix=DESC
+
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-relax --stop-after loongarch-prera-expand-pseudo \
+; RUN: --relocation-model=pic --code-model=medium %s -o %t.med.mir
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-relax --run-pass loongarch-prera-expand-pseudo \
+; RUN: --code-model=medium %t.med.mir -o - | FileCheck %s
+; RUN: llc --mtriple=loongarch64 -mattr=+d,-relax --run-pass loongarch-expand-pseudo \
+; RUN: --code-model=medium %t.med.mir -o - | FileCheck %s --check-prefixes=CALL36
;; This tests the LoongArch-specific serialization and deserialization of
;; `target-flags(...)`
@@ -15,6 +26,7 @@
declare void @callee1() nounwind
declare dso_local void @callee2() nounwind
+declare dso_local void @callee3() nounwind
define void @caller() nounwind {
; CHECK-LABEL: name: caller
@@ -24,15 +36,27 @@ define void @caller() nounwind {
; CHECK-NEXT: target-flags(loongarch-pcrel-lo) @g_i
; CHECK: target-flags(loongarch-gd-pc-hi) @t_un
; CHECK-NEXT: target-flags(loongarch-got-pc-lo) @t_un
+; DESC: target-flags(loongarch-desc-pc-hi) @t_un
+; DESC-NEXT: target-flags(loongarch-desc-pc-lo) @t_un
+; DESC-NEXT: target-flags(loongarch-desc-ld) @t_un
+; DESC-NEXT: target-flags(loongarch-desc-call) @t_un
; CHECK: target-flags(loongarch-ld-pc-hi) @t_ld
; CHECK-NEXT: target-flags(loongarch-got-pc-lo) @t_ld
+; DESC: target-flags(loongarch-desc-pc-hi) @t_ld
+; DESC-NEXT: target-flags(loongarch-desc-pc-lo) @t_ld
+; DESC-NEXT: target-flags(loongarch-desc-ld) @t_ld
+; DESC-NEXT: target-flags(loongarch-desc-call) @t_ld
; CHECK: target-flags(loongarch-ie-pc-hi) @t_ie
; CHECK-NEXT: target-flags(loongarch-ie-pc-lo) @t_ie
; CHECK: target-flags(loongarch-le-hi-r) @t_le
; CHECK-NEXT: target-flags(loongarch-le-add-r) @t_le
; CHECK-NEXT: target-flags(loongarch-le-lo-r) @t_le
; CHECK: target-flags(loongarch-call-plt) @callee1
+; CALL36: target-flags(loongarch-call36) @callee1
; CHECK: target-flags(loongarch-call) @callee2
+; CALL36: target-flags(loongarch-call36) @callee2
+; CHECK: target-flags(loongarch-call) @callee3
+; CALL36: target-flags(loongarch-call36) @callee3
%a = load volatile i32, ptr @g_e
%b = load volatile i32, ptr @g_i
%c = load volatile i32, ptr @t_un
@@ -41,5 +65,6 @@ define void @caller() nounwind {
%f = load volatile i32, ptr @t_le
call i32 @callee1()
call i32 @callee2()
+ tail call i32 @callee3()
ret void
}
>From 9a607952090934a58f90fd0081d941c411340811 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Wed, 8 Jan 2025 16:39:40 +0800
Subject: [PATCH 3/8] address comments
---
.../LoongArch/LoongArchExpandPseudoInsts.cpp | 28 ++++++++-----------
.../Target/LoongArch/LoongArchInstrInfo.cpp | 8 +-----
.../MCTargetDesc/LoongArchBaseInfo.h | 4 ++-
3 files changed, 15 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index be60de3d63d061c..c2d73a260b1c1b4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -196,14 +196,12 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair(
MachineOperand &Symbol = MI.getOperand(1);
BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg)
- .addDisp(Symbol, 0,
- EnableRelax ? LoongArchII::addRelaxFlag(FlagsHi) : FlagsHi);
+ .addDisp(Symbol, 0, LoongArchII::encodeFlags(FlagsHi, EnableRelax));
MachineInstr *SecondMI =
BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg)
.addReg(ScratchReg)
- .addDisp(Symbol, 0,
- EnableRelax ? LoongArchII::addRelaxFlag(FlagsLo) : FlagsLo);
+ .addDisp(Symbol, 0, LoongArchII::encodeFlags(FlagsLo, EnableRelax));
if (MI.hasOneMemOperand())
SecondMI->addMemOperand(*MF, *MI.memoperands_begin());
@@ -495,9 +493,8 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSDesc(
BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), Tmp1Reg)
.addDisp(Symbol, 0,
- (EnableRelax && !Large)
- ? LoongArchII::addRelaxFlag(LoongArchII::MO_DESC_PC_HI)
- : LoongArchII::MO_DESC_PC_HI);
+ LoongArchII::encodeFlags(LoongArchII::MO_DESC_PC_HI,
+ EnableRelax && !Large));
if (Large) {
// Code Sequence:
@@ -539,24 +536,21 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSDesc(
// add.w/d $dst, $a0, $tp
BuildMI(MBB, MBBI, DL, TII->get(ADDI), LoongArch::R4)
.addReg(Tmp1Reg)
- .addDisp(Symbol, 0,
- EnableRelax
- ? LoongArchII::addRelaxFlag(LoongArchII::MO_DESC_PC_LO)
- : LoongArchII::MO_DESC_PC_LO);
+ .addDisp(
+ Symbol, 0,
+ LoongArchII::encodeFlags(LoongArchII::MO_DESC_PC_LO, EnableRelax));
}
BuildMI(MBB, MBBI, DL, TII->get(LD), LoongArch::R1)
.addReg(LoongArch::R4)
.addDisp(Symbol, 0,
- (EnableRelax && !Large)
- ? LoongArchII::addRelaxFlag(LoongArchII::MO_DESC_LD)
- : LoongArchII::MO_DESC_LD);
+ LoongArchII::encodeFlags(LoongArchII::MO_DESC_LD,
+ EnableRelax && !Large));
BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PseudoDESC_CALL), LoongArch::R1)
.addReg(LoongArch::R1)
.addDisp(Symbol, 0,
- (EnableRelax && !Large)
- ? LoongArchII::addRelaxFlag(LoongArchII::MO_DESC_CALL)
- : LoongArchII::MO_DESC_CALL);
+ LoongArchII::encodeFlags(LoongArchII::MO_DESC_CALL,
+ EnableRelax && !Large));
BuildMI(MBB, MBBI, DL, TII->get(ADD), DestReg)
.addReg(LoongArch::R4)
.addReg(LoongArch::R2);
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 13c8a5a39b6f4a6..d880fe3e1cdbf3a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -454,9 +454,6 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
// * pcalau12i $a0, %got_pc_hi20(s)
// ld.w/d $a0, $a0, %got_pc_lo12(s)
//
- // * pcalau12i $a0, %ie_pc_hi20(s)
- // ld.w/d $a0, $a0, %ie_pc_lo12(s)
- //
// * pcalau12i $a0, %ld_pc_hi20(s) | %gd_pc_hi20(s)
// addi.w/d $a0, $a0, %got_pc_lo12(s)
//
@@ -492,9 +489,6 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
if (MO0 == LoongArchII::MO_GOT_PC_HI && SecondOp->getOpcode() == LdOp &&
MO1 == LoongArchII::MO_GOT_PC_LO)
return true;
- if (MO0 == LoongArchII::MO_IE_PC_HI && SecondOp->getOpcode() == LdOp &&
- MO1 == LoongArchII::MO_IE_PC_LO)
- return true;
if ((MO0 == LoongArchII::MO_LD_PC_HI ||
MO0 == LoongArchII::MO_GD_PC_HI) &&
SecondOp->getOpcode() == AddiOp && MO1 == LoongArchII::MO_GOT_PC_LO)
@@ -511,7 +505,7 @@ bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
case LoongArch::LD_W:
case LoongArch::LD_D: {
auto MO = LoongArchII::getDirectFlags(MI.getOperand(2));
- if (MO == LoongArchII::MO_GOT_PC_LO || MO == LoongArchII::MO_IE_PC_LO)
+ if (MO == LoongArchII::MO_GOT_PC_LO)
return true;
break;
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
index 371ae580419b21b..833cd062616244d 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
@@ -75,7 +75,9 @@ static inline unsigned getDirectFlags(const MachineOperand &MO) {
}
// Add MO_RELAX "bitmask" flag when FeatureRelax is enabled.
-static inline unsigned addRelaxFlag(unsigned Flags) { return Flags | MO_RELAX; }
+static inline unsigned encodeFlags(unsigned Flags, bool Relax) {
+ return Flags | (Relax ? MO_RELAX : 0);
+}
// \returns true if the given MachineOperand has MO_RELAX "bitmask" flag.
static inline bool hasRelaxFlag(const MachineOperand &MO) {
>From 59d7b50c713dc28580f6de14816a2a2debe86414 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 14 Jan 2025 21:32:45 +0800
Subject: [PATCH 4/8] [LoongArch] Pre-commit tests for tls-le merge base
offset. NFC
Similar to tests in `merge-base-offset.ll`, except for tests
of blockaddress.
A later commit will optimize this.
---
.../LoongArch/merge-base-offset-tlsle.ll | 971 ++++++++++++++++++
.../CodeGen/LoongArch/merge-base-offset.ll | 44 +-
2 files changed, 993 insertions(+), 22 deletions(-)
create mode 100644 llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
new file mode 100644
index 000000000000000..7e995d224ce1d22
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
@@ -0,0 +1,971 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefix=LA32 %s
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx --verify-machineinstrs < %s \
+; RUN: | FileCheck --check-prefix=LA64 %s
+
+ at g_i8 = dso_local thread_local(localexec) global i8 0
+
+define dso_local signext i8 @tlsle_load_s8() nounwind {
+; LA32-LABEL: tlsle_load_s8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: ld.b $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_s8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: ld.b $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+ %1 = load i8, ptr %0
+ ret i8 %1
+}
+
+define dso_local zeroext i8 @tlsle_load_u8() nounwind {
+; LA32-LABEL: tlsle_load_u8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: ld.bu $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_u8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: ld.bu $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+ %1 = load i8, ptr %0
+ ret i8 %1
+}
+
+define dso_local void @tlsle_store_i8() nounwind {
+; LA32-LABEL: tlsle_store_i8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.b $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_i8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.b $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+ store i8 1, ptr %0
+ ret void
+}
+
+ at g_i16 = dso_local thread_local(localexec) global i16 0
+
+define dso_local signext i16 @tlsle_load_s16() nounwind {
+; LA32-LABEL: tlsle_load_s16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
+; LA32-NEXT: ld.h $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_s16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
+; LA64-NEXT: ld.h $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
+ %1 = load i16, ptr %0
+ ret i16 %1
+}
+
+define dso_local zeroext i16 @tlsle_load_u16() nounwind {
+; LA32-LABEL: tlsle_load_u16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
+; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_u16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
+; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
+ %1 = load i16, ptr %0
+ ret i16 %1
+}
+
+define dso_local void @tlsle_store_i16() nounwind {
+; LA32-LABEL: tlsle_store_i16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.h $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_i16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.h $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
+ store i16 1, ptr %0
+ ret void
+}
+
+ at g_i32 = dso_local thread_local(localexec) global i32 0
+
+define dso_local signext i32 @tlsle_load_s32() nounwind {
+; LA32-LABEL: tlsle_load_s32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
+; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_s32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
+; LA64-NEXT: ld.w $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
+ %1 = load i32, ptr %0
+ ret i32 %1
+}
+
+define dso_local zeroext i32 @tlsle_load_u32() nounwind {
+; LA32-LABEL: tlsle_load_u32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
+; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_u32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
+; LA64-NEXT: ld.wu $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
+ %1 = load i32, ptr %0
+ ret i32 %1
+}
+
+define dso_local void @tlsle_store_i32() nounwind {
+; LA32-LABEL: tlsle_store_i32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_i32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.w $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
+ store i32 1, ptr %0
+ ret void
+}
+
+ at g_i64 = dso_local thread_local(localexec) global i64 0
+
+define dso_local i64 @tlsle_load_i64() nounwind {
+; LA32-LABEL: tlsle_load_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i64)
+; LA32-NEXT: addi.w $a1, $a0, %le_lo12_r(g_i64)
+; LA32-NEXT: ld.w $a0, $a1, 0
+; LA32-NEXT: ld.w $a1, $a1, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64)
+; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
+ %1 = load i64, ptr %0
+ ret i64 %1
+}
+
+define dso_local void @tlsle_store_i64() nounwind {
+; LA32-LABEL: tlsle_store_i64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i64)
+; LA32-NEXT: st.w $zero, $a0, 4
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_i64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
+ store i64 1, ptr %0
+ ret void
+}
+
+ at g_f32 = dso_local thread_local(localexec) global float 0.0
+
+define dso_local float @tlsle_load_f32() nounwind {
+; LA32-LABEL: tlsle_load_f32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32)
+; LA32-NEXT: fld.s $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_f32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32)
+; LA64-NEXT: fld.s $fa0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
+ %1 = load float, ptr %0
+ ret float %1
+}
+
+define dso_local void @tlsle_store_f32() nounwind {
+; LA32-LABEL: tlsle_store_f32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32)
+; LA32-NEXT: lu12i.w $a1, 260096
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_f32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32)
+; LA64-NEXT: lu12i.w $a1, 260096
+; LA64-NEXT: st.w $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
+ store float 1.0, ptr %0
+ ret void
+}
+
+ at g_f64 = dso_local thread_local(localexec) global double 0.0
+
+define dso_local double @tlsle_load_f64() nounwind {
+; LA32-LABEL: tlsle_load_f64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64)
+; LA32-NEXT: fld.d $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_f64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64)
+; LA64-NEXT: fld.d $fa0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
+ %1 = load double, ptr %0
+ ret double %1
+}
+
+define dso_local void @tlsle_store_f64() nounwind {
+; LA32-LABEL: tlsle_store_f64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64)
+; LA32-NEXT: vldi $vr0, -912
+; LA32-NEXT: fst.d $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_f64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64)
+; LA64-NEXT: lu52i.d $a1, $zero, 1023
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
+ store double 1.0, ptr %0
+ ret void
+}
+
+ at g_m64 = dso_local thread_local(localexec) global i64 0
+
+define dso_local void @tlsle_store_multi() nounwind {
+; LA32-LABEL: tlsle_store_multi:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_m64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_m64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_m64)
+; LA32-NEXT: st.w $zero, $a0, 4
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $zero, $a0, 4
+; LA32-NEXT: ori $a1, $zero, 2
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_multi:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_m64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_m64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_m64)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ori $a1, $zero, 2
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_m64)
+ store volatile i64 1, ptr %0
+ store volatile i64 2, ptr %0
+ ret void
+}
+
+ at g_sf32 = dso_local thread_local(localexec) global float 0.0
+
+define dso_local void @tlsle_store_sf32() nounwind {
+; LA32-LABEL: tlsle_store_sf32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf32)
+; LA32-NEXT: fld.s $fa0, $a0, 0
+; LA32-NEXT: fst.s $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_sf32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf32)
+; LA64-NEXT: fld.s $fa0, $a0, 0
+; LA64-NEXT: fst.s $fa0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf32)
+ %1 = load float, ptr %0
+ store volatile float %1, ptr %0
+ ret void
+}
+
+ at g_sf64 = dso_local thread_local(localexec) global double 0.0
+
+define dso_local void @tlsle_store_sf64() nounwind {
+; LA32-LABEL: tlsle_store_sf64:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf64)
+; LA32-NEXT: fld.d $fa0, $a0, 0
+; LA32-NEXT: fst.d $fa0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_sf64:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf64)
+; LA64-NEXT: fld.d $fa0, $a0, 0
+; LA64-NEXT: fst.d $fa0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf64)
+ %1 = load double, ptr %0
+ store volatile double %1, ptr %0
+ ret void
+}
+
+ at g_i32x4_src = dso_local thread_local(localexec) global [4 x i32] zeroinitializer, align 16
+ at g_i32x4_dst = dso_local thread_local(localexec) global [4 x i32] zeroinitializer, align 16
+
+define dso_local void @tlsle_copy_i32x4() nounwind {
+; LA32-LABEL: tlsle_copy_i32x4:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_src)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_src)
+; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_dst)
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_copy_i32x4:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_src)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_src)
+; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_dst)
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x4_src)
+ %1 = load <4 x i32>, ptr %0, align 16
+ %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x4_dst)
+ store <4 x i32> %1, ptr %2, align 16
+ ret void
+}
+
+ at g_i32x8_src = dso_local thread_local(localexec) global [8 x i32] zeroinitializer, align 32
+ at g_i32x8_dst = dso_local thread_local(localexec) global [8 x i32] zeroinitializer, align 32
+
+define dso_local void @tlsle_copy_i32x8() nounwind {
+; LA32-LABEL: tlsle_copy_i32x8:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_src)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_src)
+; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_dst)
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_copy_i32x8:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_src)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_src)
+; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_dst)
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x8_src)
+ %1 = load <8 x i32>, ptr %0, align 32
+ %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x8_dst)
+ store <8 x i32> %1, ptr %2, align 32
+ ret void
+}
+
+ at g_i8x16 = dso_local thread_local(localexec) global <16 x i8> zeroinitializer, align 16
+
+define dso_local void @tlsle_copy_i8_to_i8x16() nounwind {
+; LA32-LABEL: tlsle_copy_i8_to_i8x16:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: vldrepl.b $vr0, $a0, 0
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x16)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x16)
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_copy_i8_to_i8x16:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: vldrepl.b $vr0, $a0, 0
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x16)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8x16)
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+ %1 = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr %0, i32 0)
+ %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8x16)
+ store <16 x i8> %1, ptr %2, align 16
+ ret void
+}
+
+ at g_i8x32 = dso_local thread_local(localexec) global <32 x i8> zeroinitializer, align 32
+
+define dso_local void @tlsle_copy_i8_to_i8x32() nounwind {
+; LA32-LABEL: tlsle_copy_i8_to_i8x32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
+; LA32-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x32)
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_copy_i8_to_i8x32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
+; LA64-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8x32)
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
+ %1 = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr %0, i32 0)
+ %2 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8x32)
+ store <32 x i8> %1, ptr %2, align 32
+ ret void
+}
+
+ at g_rmw = dso_local thread_local(localexec) global i64 0
+
+define dso_local void @tlsle_rmw() nounwind {
+; LA32-LABEL: tlsle_rmw:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_rmw)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_rmw)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_rmw)
+; LA32-NEXT: ld.w $a1, $a0, 0
+; LA32-NEXT: ld.w $a2, $a0, 4
+; LA32-NEXT: addi.w $a1, $a1, 1
+; LA32-NEXT: sltui $a3, $a1, 1
+; LA32-NEXT: add.w $a2, $a2, $a3
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a2, $a0, 4
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_rmw:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_rmw)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_rmw)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_rmw)
+; LA64-NEXT: ld.d $a1, $a0, 0
+; LA64-NEXT: addi.d $a1, $a1, 1
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_rmw)
+ %1 = load i64, ptr %0
+ %2 = add i64 %1, 1
+ store i64 %2, ptr %0
+ ret void
+}
+
+ at g_a32 = dso_local thread_local(localexec) global [2048 x i32] zeroinitializer, align 4
+
+define dso_local void @tlsle_store_a32() nounwind {
+; LA32-LABEL: tlsle_store_a32:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a32)
+; LA32-NEXT: lu12i.w $a1, 1
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ori $a1, $zero, 1
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_a32:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a32)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: stptr.w $a1, $a0, 4096
+; LA64-NEXT: ret
+entry:
+ store i32 1, ptr getelementptr inbounds ([1 x i32], ptr @g_a32, i32 1024), align 4
+ ret void
+}
+
+define dso_local void @tlsle_store_a32_2() nounwind {
+; LA32-LABEL: tlsle_store_a32_2:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a32)
+; LA32-NEXT: lu12i.w $a1, 1
+; LA32-NEXT: add.w $a2, $a0, $a1
+; LA32-NEXT: ori $a3, $zero, 1
+; LA32-NEXT: st.w $a3, $a2, 0
+; LA32-NEXT: ori $a1, $a1, 8
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ori $a1, $zero, 2
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_store_a32_2:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a32)
+; LA64-NEXT: ori $a1, $zero, 1
+; LA64-NEXT: stptr.w $a1, $a0, 4096
+; LA64-NEXT: ori $a1, $zero, 2
+; LA64-NEXT: stptr.w $a1, $a0, 4104
+; LA64-NEXT: ret
+entry:
+ store i32 1, ptr getelementptr inbounds ([1 x i32], ptr @g_a32, i32 1024), align 4
+ store i32 2, ptr getelementptr inbounds ([1 x i32], ptr @g_a32, i32 1026), align 4
+ ret void
+}
+
+define dso_local void @tlsle_control_flow_with_mem_access() nounwind {
+; LA32-LABEL: tlsle_control_flow_with_mem_access:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a32)
+; LA32-NEXT: ld.w $a1, $a0, 4
+; LA32-NEXT: ori $a2, $zero, 1
+; LA32-NEXT: blt $a1, $a2, .LBB25_2
+; LA32-NEXT: # %bb.1: # %if.then
+; LA32-NEXT: ori $a1, $zero, 10
+; LA32-NEXT: st.w $a1, $a0, 4
+; LA32-NEXT: .LBB25_2: # %if.end
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_control_flow_with_mem_access:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a32)
+; LA64-NEXT: ld.w $a1, $a0, 4
+; LA64-NEXT: ori $a2, $zero, 1
+; LA64-NEXT: blt $a1, $a2, .LBB25_2
+; LA64-NEXT: # %bb.1: # %if.then
+; LA64-NEXT: ori $a1, $zero, 10
+; LA64-NEXT: st.w $a1, $a0, 4
+; LA64-NEXT: .LBB25_2: # %if.end
+; LA64-NEXT: ret
+entry:
+ %0 = load i32, ptr getelementptr inbounds ([1 x i32], ptr @g_a32, i32 1), align 4
+ %cmp = icmp sgt i32 %0, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i32 10, ptr getelementptr inbounds ([1 x i32], ptr @g_a32, i32 1), align 4
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+ at g_a64 = dso_local thread_local(localexec) global [614750729487779976 x i64] zeroinitializer, align 8
+
+define dso_local ptr @tlsle_load_addr_offset_1() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_1:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, 8
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_1:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, 8
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_257() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_257:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, 2047
+; LA32-NEXT: addi.w $a0, $a0, 9
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_257:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, 2047
+; LA64-NEXT: addi.d $a0, $a0, 9
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 257)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_1048576() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_1048576:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: lu12i.w $a1, 2048
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_1048576:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: addu16i.d $a0, $a0, 128
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1048576)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_1048577() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_1048577:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: lu12i.w $a1, 2048
+; LA32-NEXT: ori $a1, $a1, 8
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_1048577:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: addu16i.d $a0, $a0, 128
+; LA64-NEXT: addi.d $a0, $a0, 8
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1048577)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_268432896() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_268432896:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: lu12i.w $a1, 524283
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_268432896:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: lu12i.w $a1, 524283
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 268432896)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_268432897() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_268432897:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: lu12i.w $a1, 524283
+; LA32-NEXT: ori $a1, $a1, 8
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_268432897:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: lu12i.w $a1, 524283
+; LA64-NEXT: ori $a1, $a1, 8
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 268432897)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_281474439839744() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_281474439839744:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_281474439839744:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: ori $a1, $zero, 0
+; LA64-NEXT: lu32i.d $a1, 524287
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 281474439839744)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_248792680471040() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_248792680471040:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: lu12i.w $a1, 502733
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_248792680471040:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: lu12i.w $a1, 502733
+; LA64-NEXT: lu32i.d $a1, 463412
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 248792680471040)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_9380351707272() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_9380351707272:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: lu12i.w $a1, 279556
+; LA32-NEXT: ori $a1, $a1, 1088
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_9380351707272:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: lu12i.w $a1, 279556
+; LA64-NEXT: ori $a1, $a1, 1088
+; LA64-NEXT: lu32i.d $a1, 17472
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 9380351707272)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_562949953421312() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_562949953421312:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_562949953421312:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: lu52i.d $a1, $zero, 1
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 562949953421312)
+}
+
+define dso_local ptr @tlsle_load_addr_offset_614749556925924693() nounwind {
+; LA32-LABEL: tlsle_load_addr_offset_614749556925924693:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
+; LA32-NEXT: lu12i.w $a1, 209666
+; LA32-NEXT: ori $a1, $a1, 2728
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: tlsle_load_addr_offset_614749556925924693:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
+; LA64-NEXT: lu12i.w $a1, 209666
+; LA64-NEXT: ori $a1, $a1, 2728
+; LA64-NEXT: lu32i.d $a1, 15288
+; LA64-NEXT: lu52i.d $a1, $a1, 1092
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: ret
+entry:
+ ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 614749556925924693)
+}
+
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
index 9df5532d51179ec..2af206699d4ad76 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll
@@ -257,8 +257,8 @@ entry:
@g_i64 = dso_local global i64 0
-define dso_local i64 @load_64() nounwind {
-; LA32-LABEL: load_64:
+define dso_local i64 @load_i64() nounwind {
+; LA32-LABEL: load_i64:
; LA32: # %bb.0: # %entry
; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i64)
; LA32-NEXT: addi.w $a1, $a0, %pc_lo12(g_i64)
@@ -266,13 +266,13 @@ define dso_local i64 @load_64() nounwind {
; LA32-NEXT: ld.w $a1, $a1, 4
; LA32-NEXT: ret
;
-; LA64-LABEL: load_64:
+; LA64-LABEL: load_i64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i64)
; LA64-NEXT: ld.d $a0, $a0, %pc_lo12(g_i64)
; LA64-NEXT: ret
;
-; LA64-LARGE-LABEL: load_64:
+; LA64-LARGE-LABEL: load_i64:
; LA64-LARGE: # %bb.0: # %entry
; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i64)
; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i64)
@@ -580,36 +580,36 @@ entry:
define dso_local void @copy_i32x8() nounwind {
; LA32-LABEL: copy_i32x8:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
-; LA32-NEXT: xvld $xr0, $a0, %pc_lo12(g_i32x4_src)
-; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
-; LA32-NEXT: xvst $xr0, $a0, %pc_lo12(g_i32x4_dst)
+; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x8_src)
+; LA32-NEXT: xvld $xr0, $a0, %pc_lo12(g_i32x8_src)
+; LA32-NEXT: pcalau12i $a0, %pc_hi20(g_i32x8_dst)
+; LA32-NEXT: xvst $xr0, $a0, %pc_lo12(g_i32x8_dst)
; LA32-NEXT: ret
;
; LA64-LABEL: copy_i32x8:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
-; LA64-NEXT: xvld $xr0, $a0, %pc_lo12(g_i32x4_src)
-; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
-; LA64-NEXT: xvst $xr0, $a0, %pc_lo12(g_i32x4_dst)
+; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x8_src)
+; LA64-NEXT: xvld $xr0, $a0, %pc_lo12(g_i32x8_src)
+; LA64-NEXT: pcalau12i $a0, %pc_hi20(g_i32x8_dst)
+; LA64-NEXT: xvst $xr0, $a0, %pc_lo12(g_i32x8_dst)
; LA64-NEXT: ret
;
; LA64-LARGE-LABEL: copy_i32x8:
; LA64-LARGE: # %bb.0: # %entry
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_src)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_src)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_src)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_src)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x8_src)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x8_src)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x8_src)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x8_src)
; LA64-LARGE-NEXT: xvldx $xr0, $a1, $a0
-; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x4_dst)
-; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x4_dst)
-; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x4_dst)
-; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x4_dst)
+; LA64-LARGE-NEXT: pcalau12i $a0, %pc_hi20(g_i32x8_dst)
+; LA64-LARGE-NEXT: addi.d $a1, $zero, %pc_lo12(g_i32x8_dst)
+; LA64-LARGE-NEXT: lu32i.d $a1, %pc64_lo20(g_i32x8_dst)
+; LA64-LARGE-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g_i32x8_dst)
; LA64-LARGE-NEXT: xvstx $xr0, $a1, $a0
; LA64-LARGE-NEXT: ret
entry:
- %0 = load <8 x i32>, ptr @g_i32x4_src, align 32
- store <8 x i32> %0, ptr @g_i32x4_dst, align 32
+ %0 = load <8 x i32>, ptr @g_i32x8_src, align 32
+ store <8 x i32> %0, ptr @g_i32x8_dst, align 32
ret void
}
>From e37c2933125a65e627949557fba7a606d41db716 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 14 Jan 2025 21:35:31 +0800
Subject: [PATCH 5/8] [LoongArch] Merge base and offset for tls-le code
sequence
Adapt the merge base offset pass to optimize the tls-le
code sequence.
---
.../LoongArch/LoongArchMergeBaseOffset.cpp | 165 ++++++++-
.../LoongArch/machinelicm-address-pseudos.ll | 6 +-
.../LoongArch/merge-base-offset-tlsle.ll | 318 +++++++-----------
3 files changed, 266 insertions(+), 223 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
index 7f98f7718a538d6..2aae498e1f2de2b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -37,6 +37,8 @@ class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
MachineInstr *&Lo20, MachineInstr *&Hi12,
MachineInstr *&Last);
+ bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Add,
+ MachineInstr *&Lo12);
bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
MachineInstr *&Lo20, MachineInstr *&Hi12,
@@ -176,7 +178,80 @@ bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
return true;
}
-// Update the offset in Hi20, Lo12, Lo20 and Hi12 instructions.
+// Detect the pattern:
+//
+// (small/medium):
+// lu12i.w vreg1, %le_hi20_r(s)
+// add.w/d vreg2, vreg1, r2, %le_add_r(s)
+// addi.w/d vreg3, vreg2, %le_lo12_r(s)
+
+// The pattern is only accepted if:
+// 1) The first instruction has only one use, which is the PseudoAddTPRel.
+// The second instruction has only one use, which is the ADDI. The
+// second instruction's last operand is the tp register.
+// 2) The address operands have the appropriate type, reflecting the
+// lowering of a thread_local global address using the pattern.
+// 3) The offset value in the ThreadLocal Global Address is 0.
+bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
+ MachineInstr *&Add,
+ MachineInstr *&Lo12) {
+ if (Hi20.getOpcode() != LoongArch::LU12I_W)
+ return false;
+
+ auto isGlobalOrCPI = [](const MachineOperand &Op) {
+ return Op.isGlobal() || Op.isCPI();
+ };
+
+ const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
+ if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_LE_HI_R ||
+ !isGlobalOrCPI(Hi20Op1) || Hi20Op1.getOffset() != 0)
+ return false;
+
+ Register HiDestReg = Hi20.getOperand(0).getReg();
+ if (!MRI->hasOneUse(HiDestReg))
+ return false;
+
+ Add = &*MRI->use_instr_begin(HiDestReg);
+ if ((ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_D) ||
+ (!ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_W))
+ return false;
+
+ if (Add->getOperand(2).getReg() != LoongArch::R2)
+ return false;
+
+ const MachineOperand &AddOp3 = Add->getOperand(3);
+ if (LoongArchII::getDirectFlags(AddOp3) != LoongArchII::MO_LE_ADD_R ||
+ !(isGlobalOrCPI(AddOp3) || AddOp3.isMCSymbol()) ||
+ AddOp3.getOffset() != 0)
+ return false;
+
+ Register AddDestReg = Add->getOperand(0).getReg();
+ if (!MRI->hasOneUse(AddDestReg))
+ return false;
+
+ Lo12 = &*MRI->use_instr_begin(AddDestReg);
+ if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
+ (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
+ return false;
+
+ const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
+ if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_LE_LO_R ||
+ !(isGlobalOrCPI(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
+ Lo12Op2.getOffset() != 0)
+ return false;
+
+ if (Hi20Op1.isGlobal()) {
+ LLVM_DEBUG(dbgs() << " Found lowered global address: "
+ << *Hi20Op1.getGlobal() << "\n");
+ } else if (Hi20Op1.isCPI()) {
+ LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
+ << "\n");
+ }
+
+ return true;
+}
+
+// Update the offset in Hi20, (Add), Lo12, (Lo20 and Hi12) instructions.
// Delete the tail instruction and update all the uses to use the
// output from Last.
void LoongArchMergeBaseOffsetOpt::foldOffset(
@@ -190,31 +265,49 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
Lo20->getOperand(2).setOffset(Offset);
Hi12->getOperand(2).setOffset(Offset);
}
+
+ // For tls-le, offset of the second PseudoAddTPRel instr should also be
+ // updated.
+ MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
+ if (Hi20.getOpcode() == LoongArch::LU12I_W)
+ Add->getOperand(3).setOffset(Offset);
+
// Delete the tail instruction.
MachineInstr *Def = Last ? Last : &Lo12;
MRI->constrainRegClass(Def->getOperand(0).getReg(),
MRI->getRegClass(Tail.getOperand(0).getReg()));
MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
Tail.eraseFromParent();
+
LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
- << " " << Hi20 << " " << Lo12;);
+ << " " << Hi20;);
+ if (Hi20.getOpcode() == LoongArch::LU12I_W) {
+ LLVM_DEBUG(dbgs() << " " << *Add;);
+ }
+ LLVM_DEBUG(dbgs() << " " << Lo12;);
if (Lo20 && Hi12) {
LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;);
}
}
// Detect patterns for large offsets that are passed into an ADD instruction.
-// If the pattern is found, updates the offset in Hi20, Lo12, Lo20 and Hi12
-// instructions and deletes TailAdd and the instructions that produced the
-// offset.
+// If the pattern is found, updates the offset in Hi20, (Add), Lo12,
+// (Lo20 and Hi12) instructions and deletes TailAdd and the instructions that
+// produced the offset.
//
// (The instructions marked with "!" are not necessarily present)
//
// Base address lowering is of the form:
-// Hi20: pcalau12i vreg1, %pc_hi20(s)
-// +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
-// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
-// +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
+// 1) pcala:
+// Hi20: pcalau12i vreg1, %pc_hi20(s)
+// +--- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
+// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
+// +--- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
+// |
+// | 2) tls-le:
+// | Hi20: lu12i.w vreg1, %le_hi20_r(s)
+// | Add: add.w/d vreg1, vreg1, r2, %le_add_r(s)
+// +--- Lo12: addi.w/d vreg2, vreg1, %le_lo12_r(s)
// |
// | The large offset can be one of the forms:
// |
@@ -334,7 +427,8 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
// Look for arithmetic instructions we can get an offset from.
// We might be able to remove the arithmetic instructions by folding the
- // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I).
+ // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I) or
+ // LU12I_W+PseudoAddTPRel+ADDI.
if (!MRI->hasOneUse(DestReg))
return false;
@@ -454,6 +548,7 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
// If all the uses are memory ops with the same offset, we can transform:
//
// 1. (small/medium):
+ // 1.1. pcala
// pcalau12i vreg1, %pc_hi20(s)
// addi.d vreg2, vreg1, %pc_lo12(s)
// ld.w vreg3, 8(vreg2)
@@ -463,6 +558,18 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
// pcalau12i vreg1, %pc_hi20(s+8)
// ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1)
//
+ // 1.2. tls-le
+ // lu12i.w vreg1, %le_hi20_r(s)
+ // add.w/d vreg2, vreg1, r2, %le_add_r(s)
+ // addi.w/d vreg3, vreg2, %le_lo12_r(s)
+ // ld.w vreg4, 8(vreg3)
+ //
+ // =>
+ //
+ // lu12i.w vreg1, %le_hi20_r(s+8)
+ // add.w/d vreg2, vreg1, r2, %le_add_r(s+8)
+ // ld.w vreg4, vreg2, %le_lo12_r(s+8)(vreg2)
+ //
// 2. (large):
// pcalau12i vreg1, %pc_hi20(s)
// addi.d vreg2, $zero, %pc_lo12(s)
@@ -598,7 +705,8 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
return false;
// If optimized by this pass successfully, MO_RELAX bitmask target-flag should
- // be removed from the code sequence.
+ // be removed from the pcala code sequence. Code sequence of tls-le can still
+ // be relaxed after being optimized.
//
// For example:
// pcalau12i $a0, %pc_hi20(symbol)
@@ -614,15 +722,20 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
// optimized, it cannot be relaxed any more. So MO_RELAX flag should not be
// carried by them.
Hi20.getOperand(1).setOffset(NewOffset);
- Hi20.getOperand(1).setTargetFlags(
- LoongArchII::getDirectFlags(Hi20.getOperand(1)));
MachineOperand &ImmOp = Lo12.getOperand(2);
ImmOp.setOffset(NewOffset);
- ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
if (Lo20 && Hi12) {
Lo20->getOperand(2).setOffset(NewOffset);
Hi12->getOperand(2).setOffset(NewOffset);
}
+ if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
+ Hi20.getOperand(1).setTargetFlags(
+ LoongArchII::getDirectFlags(Hi20.getOperand(1)));
+ ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
+ } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
+ MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
+ Add->getOperand(3).setOffset(NewOffset);
+ }
// Update the immediate in the load/store instructions to add the offset.
const LoongArchInstrInfo &TII = *ST->getInstrInfo();
@@ -673,7 +786,14 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
return true;
}
- MRI->replaceRegWith(Lo12.getOperand(0).getReg(), Hi20.getOperand(0).getReg());
+ if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
+ MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
+ Hi20.getOperand(0).getReg());
+ } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
+ MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
+ MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
+ Add->getOperand(0).getReg());
+ }
Lo12.eraseFromParent();
return true;
}
@@ -693,8 +813,21 @@ bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
MachineInstr *Lo20 = nullptr;
MachineInstr *Hi12 = nullptr;
MachineInstr *Last = nullptr;
- if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
+ if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
+ // Detect foldable pcala code sequence in small/medium/large code model.
+ if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
+ continue;
+ } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
+ MachineInstr *Add = nullptr;
+ // Detect foldable tls-le code sequence in small/medium code model.
+ if (!detectFoldable(Hi20, Add, Lo12))
+ continue;
+ } else {
continue;
+ }
+ // For tls-le, we do not pass the second PseudoAddTPRel instr in order to
+ // reuse the existing hooks and the last three paramaters should always be
+ // nullptr.
MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last);
MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last);
}
diff --git a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
index e0a93e3051bf88c..92d079ab3a8d870 100644
--- a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
+++ b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
@@ -317,11 +317,10 @@ define void @test_la_tls_le(i32 signext %n) {
; LA32-NEXT: move $a1, $zero
; LA32-NEXT: lu12i.w $a2, %le_hi20_r(le)
; LA32-NEXT: add.w $a2, $a2, $tp, %le_add_r(le)
-; LA32-NEXT: addi.w $a2, $a2, %le_lo12_r(le)
; LA32-NEXT: .p2align 4, , 16
; LA32-NEXT: .LBB4_1: # %loop
; LA32-NEXT: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ld.w $zero, $a2, 0
+; LA32-NEXT: ld.w $zero, $a2, %le_lo12_r(le)
; LA32-NEXT: addi.w $a1, $a1, 1
; LA32-NEXT: blt $a1, $a0, .LBB4_1
; LA32-NEXT: # %bb.2: # %ret
@@ -332,11 +331,10 @@ define void @test_la_tls_le(i32 signext %n) {
; LA64-NEXT: move $a1, $zero
; LA64-NEXT: lu12i.w $a2, %le_hi20_r(le)
; LA64-NEXT: add.d $a2, $a2, $tp, %le_add_r(le)
-; LA64-NEXT: addi.d $a2, $a2, %le_lo12_r(le)
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB4_1: # %loop
; LA64-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ld.w $zero, $a2, 0
+; LA64-NEXT: ld.w $zero, $a2, %le_lo12_r(le)
; LA64-NEXT: addi.w $a1, $a1, 1
; LA64-NEXT: blt $a1, $a0, .LBB4_1
; LA64-NEXT: # %bb.2: # %ret
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
index 7e995d224ce1d22..9ed9a865ce55d40 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
@@ -11,16 +11,14 @@ define dso_local signext i8 @tlsle_load_s8() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
-; LA32-NEXT: ld.b $a0, $a0, 0
+; LA32-NEXT: ld.b $a0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_s8:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
-; LA64-NEXT: ld.b $a0, $a0, 0
+; LA64-NEXT: ld.b $a0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -33,16 +31,14 @@ define dso_local zeroext i8 @tlsle_load_u8() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
-; LA32-NEXT: ld.bu $a0, $a0, 0
+; LA32-NEXT: ld.bu $a0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_u8:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
-; LA64-NEXT: ld.bu $a0, $a0, 0
+; LA64-NEXT: ld.bu $a0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -55,18 +51,16 @@ define dso_local void @tlsle_store_i8() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: ori $a1, $zero, 1
-; LA32-NEXT: st.b $a1, $a0, 0
+; LA32-NEXT: st.b $a1, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_i8:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.b $a1, $a0, 0
+; LA64-NEXT: st.b $a1, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -81,16 +75,14 @@ define dso_local signext i16 @tlsle_load_s16() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
-; LA32-NEXT: ld.h $a0, $a0, 0
+; LA32-NEXT: ld.h $a0, $a0, %le_lo12_r(g_i16)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_s16:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
-; LA64-NEXT: ld.h $a0, $a0, 0
+; LA64-NEXT: ld.h $a0, $a0, %le_lo12_r(g_i16)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
@@ -103,16 +95,14 @@ define dso_local zeroext i16 @tlsle_load_u16() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
-; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: ld.hu $a0, $a0, %le_lo12_r(g_i16)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_u16:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
-; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: ld.hu $a0, $a0, %le_lo12_r(g_i16)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
@@ -125,18 +115,16 @@ define dso_local void @tlsle_store_i16() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
; LA32-NEXT: ori $a1, $zero, 1
-; LA32-NEXT: st.h $a1, $a0, 0
+; LA32-NEXT: st.h $a1, $a0, %le_lo12_r(g_i16)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_i16:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.h $a1, $a0, 0
+; LA64-NEXT: st.h $a1, $a0, %le_lo12_r(g_i16)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
@@ -151,16 +139,14 @@ define dso_local signext i32 @tlsle_load_s32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
-; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, %le_lo12_r(g_i32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_s32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
-; LA64-NEXT: ld.w $a0, $a0, 0
+; LA64-NEXT: ld.w $a0, $a0, %le_lo12_r(g_i32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
@@ -173,16 +159,14 @@ define dso_local zeroext i32 @tlsle_load_u32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
-; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, %le_lo12_r(g_i32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_u32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
-; LA64-NEXT: ld.wu $a0, $a0, 0
+; LA64-NEXT: ld.wu $a0, $a0, %le_lo12_r(g_i32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
@@ -195,18 +179,16 @@ define dso_local void @tlsle_store_i32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
; LA32-NEXT: ori $a1, $zero, 1
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_i32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_i32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.w $a1, $a0, 0
+; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_i32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
@@ -230,8 +212,7 @@ define dso_local i64 @tlsle_load_i64() nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64)
-; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: ld.d $a0, $a0, %le_lo12_r(g_i64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
@@ -254,9 +235,8 @@ define dso_local void @tlsle_store_i64() nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_i64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
@@ -271,16 +251,14 @@ define dso_local float @tlsle_load_f32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32)
-; LA32-NEXT: fld.s $fa0, $a0, 0
+; LA32-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_f32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_f32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32)
-; LA64-NEXT: fld.s $fa0, $a0, 0
+; LA64-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_f32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
@@ -293,18 +271,16 @@ define dso_local void @tlsle_store_f32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32)
; LA32-NEXT: lu12i.w $a1, 260096
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_f32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_f32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32)
; LA64-NEXT: lu12i.w $a1, 260096
-; LA64-NEXT: st.w $a1, $a0, 0
+; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_f32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
@@ -319,16 +295,14 @@ define dso_local double @tlsle_load_f64() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64)
-; LA32-NEXT: fld.d $fa0, $a0, 0
+; LA32-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_f64)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_f64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64)
-; LA64-NEXT: fld.d $fa0, $a0, 0
+; LA64-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_f64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
@@ -341,18 +315,16 @@ define dso_local void @tlsle_store_f64() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64)
; LA32-NEXT: vldi $vr0, -912
-; LA32-NEXT: fst.d $fa0, $a0, 0
+; LA32-NEXT: fst.d $fa0, $a0, %le_lo12_r(g_f64)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_f64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64)
; LA64-NEXT: lu52i.d $a1, $zero, 1023
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_f64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
@@ -380,11 +352,10 @@ define dso_local void @tlsle_store_multi() nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_m64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_m64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_m64)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_m64)
; LA64-NEXT: ori $a1, $zero, 2
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_m64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_m64)
@@ -400,18 +371,16 @@ define dso_local void @tlsle_store_sf32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf32)
-; LA32-NEXT: fld.s $fa0, $a0, 0
-; LA32-NEXT: fst.s $fa0, $a0, 0
+; LA32-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_sf32)
+; LA32-NEXT: fst.s $fa0, $a0, %le_lo12_r(g_sf32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_sf32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf32)
-; LA64-NEXT: fld.s $fa0, $a0, 0
-; LA64-NEXT: fst.s $fa0, $a0, 0
+; LA64-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_sf32)
+; LA64-NEXT: fst.s $fa0, $a0, %le_lo12_r(g_sf32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf32)
@@ -427,18 +396,16 @@ define dso_local void @tlsle_store_sf64() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf64)
-; LA32-NEXT: fld.d $fa0, $a0, 0
-; LA32-NEXT: fst.d $fa0, $a0, 0
+; LA32-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_sf64)
+; LA32-NEXT: fst.d $fa0, $a0, %le_lo12_r(g_sf64)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_sf64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf64)
-; LA64-NEXT: fld.d $fa0, $a0, 0
-; LA64-NEXT: fst.d $fa0, $a0, 0
+; LA64-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_sf64)
+; LA64-NEXT: fst.d $fa0, $a0, %le_lo12_r(g_sf64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf64)
@@ -455,24 +422,20 @@ define dso_local void @tlsle_copy_i32x4() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_src)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_src)
-; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vld $vr0, $a0, %le_lo12_r(g_i32x4_src)
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_dst)
-; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: vst $vr0, $a0, %le_lo12_r(g_i32x4_dst)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_copy_i32x4:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_src)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_src)
-; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vld $vr0, $a0, %le_lo12_r(g_i32x4_src)
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_dst)
-; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: vst $vr0, $a0, %le_lo12_r(g_i32x4_dst)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x4_src)
@@ -490,24 +453,20 @@ define dso_local void @tlsle_copy_i32x8() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_src)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_src)
-; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvld $xr0, $a0, %le_lo12_r(g_i32x8_src)
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_dst)
-; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i32x8_dst)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_copy_i32x8:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_src)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_src)
-; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvld $xr0, $a0, %le_lo12_r(g_i32x8_src)
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_dst)
-; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i32x8_dst)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x8_src)
@@ -524,24 +483,20 @@ define dso_local void @tlsle_copy_i8_to_i8x16() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
-; LA32-NEXT: vldrepl.b $vr0, $a0, 0
+; LA32-NEXT: vldrepl.b $vr0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x16)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x16)
-; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: vst $vr0, $a0, %le_lo12_r(g_i8x16)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_copy_i8_to_i8x16:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
-; LA64-NEXT: vldrepl.b $vr0, $a0, 0
+; LA64-NEXT: vldrepl.b $vr0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x16)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8x16)
-; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: vst $vr0, $a0, %le_lo12_r(g_i8x16)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -558,24 +513,20 @@ define dso_local void @tlsle_copy_i8_to_i8x32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
-; LA32-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA32-NEXT: xvldrepl.b $xr0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x32)
-; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i8x32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_copy_i8_to_i8x32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
-; LA64-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA64-NEXT: xvldrepl.b $xr0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8x32)
-; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i8x32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -606,10 +557,9 @@ define dso_local void @tlsle_rmw() nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_rmw)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_rmw)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_rmw)
-; LA64-NEXT: ld.d $a1, $a0, 0
+; LA64-NEXT: ld.d $a1, $a0, %le_lo12_r(g_rmw)
; LA64-NEXT: addi.d $a1, $a1, 1
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_rmw)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_rmw)
@@ -624,22 +574,18 @@ entry:
define dso_local void @tlsle_store_a32() nounwind {
; LA32-LABEL: tlsle_store_a32:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a32)
-; LA32-NEXT: lu12i.w $a1, 1
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4096)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32+4096)
; LA32-NEXT: ori $a1, $zero, 1
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4096)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_a32:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a32)
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4096)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32+4096)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: stptr.w $a1, $a0, 4096
+; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4096)
; LA64-NEXT: ret
entry:
store i32 1, ptr getelementptr inbounds ([1 x i32], ptr @g_a32, i32 1024), align 4
@@ -681,29 +627,27 @@ entry:
define dso_local void @tlsle_control_flow_with_mem_access() nounwind {
; LA32-LABEL: tlsle_control_flow_with_mem_access:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a32)
-; LA32-NEXT: ld.w $a1, $a0, 4
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32+4)
+; LA32-NEXT: ld.w $a1, $a0, %le_lo12_r(g_a32+4)
; LA32-NEXT: ori $a2, $zero, 1
; LA32-NEXT: blt $a1, $a2, .LBB25_2
; LA32-NEXT: # %bb.1: # %if.then
; LA32-NEXT: ori $a1, $zero, 10
-; LA32-NEXT: st.w $a1, $a0, 4
+; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4)
; LA32-NEXT: .LBB25_2: # %if.end
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_control_flow_with_mem_access:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a32)
-; LA64-NEXT: ld.w $a1, $a0, 4
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32+4)
+; LA64-NEXT: ld.w $a1, $a0, %le_lo12_r(g_a32+4)
; LA64-NEXT: ori $a2, $zero, 1
; LA64-NEXT: blt $a1, $a2, .LBB25_2
; LA64-NEXT: # %bb.1: # %if.then
; LA64-NEXT: ori $a1, $zero, 10
-; LA64-NEXT: st.w $a1, $a0, 4
+; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4)
; LA64-NEXT: .LBB25_2: # %if.end
; LA64-NEXT: ret
entry:
@@ -724,18 +668,16 @@ if.end:
define dso_local ptr @tlsle_load_addr_offset_1() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_1:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, 8
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+8)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_1:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, 8
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+8)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1)
@@ -744,20 +686,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_257() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_257:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 9
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2056)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2056)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2056)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_257:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, 2047
-; LA64-NEXT: addi.d $a0, $a0, 9
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2056)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+2056)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+2056)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 257)
@@ -766,19 +704,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_1048576() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_1048576:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 2048
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388608)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+8388608)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+8388608)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_1048576:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: addu16i.d $a0, $a0, 128
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388608)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+8388608)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+8388608)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1048576)
@@ -787,21 +722,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_1048577() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_1048577:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 2048
-; LA32-NEXT: ori $a1, $a1, 8
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388616)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+8388616)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+8388616)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_1048577:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: addu16i.d $a0, $a0, 128
-; LA64-NEXT: addi.d $a0, $a0, 8
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388616)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+8388616)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+8388616)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1048577)
@@ -810,20 +740,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_268432896() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_268432896:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 524283
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463168)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2147463168)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2147463168)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_268432896:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: lu12i.w $a1, 524283
-; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463168)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+2147463168)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+2147463168)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 268432896)
@@ -832,22 +758,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_268432897() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_268432897:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 524283
-; LA32-NEXT: ori $a1, $a1, 8
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463176)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2147463176)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2147463176)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_268432897:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: lu12i.w $a1, 524283
-; LA64-NEXT: ori $a1, $a1, 8
-; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463176)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+2147463176)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+2147463176)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 268432897)
@@ -877,11 +797,9 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_248792680471040() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_248792680471040:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 502733
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2059194368)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2059194368)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2059194368)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_248792680471040:
@@ -900,12 +818,9 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_9380351707272() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_9380351707272:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 279556
-; LA32-NEXT: ori $a1, $a1, 1088
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+1145062464)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+1145062464)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+1145062464)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_9380351707272:
@@ -945,12 +860,9 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_614749556925924693() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_614749556925924693:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 209666
-; LA32-NEXT: ori $a1, $a1, 2728
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+858794664)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+858794664)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+858794664)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_614749556925924693:
>From 693401e1ca8caaf18b30c40e78f74fe0134470d9 Mon Sep 17 00:00:00 2001
From: ZhaoQi <zhaoqi01 at loongson.cn>
Date: Mon, 10 Feb 2025 16:58:40 +0800
Subject: [PATCH 6/8] Update LoongArchMergeBaseOffset.cpp
---
llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
index db6270f824c3c54..2aae498e1f2de2b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -722,11 +722,8 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
// optimized, it cannot be relaxed any more. So MO_RELAX flag should not be
// carried by them.
Hi20.getOperand(1).setOffset(NewOffset);
- Hi20.getOperand(1).setTargetFlags(
- LoongArchII::getDirectFlags(Hi20.getOperand(1)));
MachineOperand &ImmOp = Lo12.getOperand(2);
ImmOp.setOffset(NewOffset);
- ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
if (Lo20 && Hi12) {
Lo20->getOperand(2).setOffset(NewOffset);
Hi12->getOperand(2).setOffset(NewOffset);
>From a32255c9de883811297fda83e794ae470e60d571 Mon Sep 17 00:00:00 2001
From: ZhaoQi <zhaoqi01 at loongson.cn>
Date: Mon, 10 Feb 2025 17:02:27 +0800
Subject: [PATCH 7/8] Update merge-base-offset-tlsle.ll
---
llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
index 79dee8f9ef37557..9ed9a865ce55d40 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
@@ -880,4 +880,4 @@ entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 614749556925924693)
}
-declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
\ No newline at end of file
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
>From 9012ff03ee63bf4119cc589f648081bc604d807b Mon Sep 17 00:00:00 2001
From: ZhaoQi <zhaoqi01 at loongson.cn>
Date: Mon, 10 Feb 2025 17:04:28 +0800
Subject: [PATCH 8/8] Update merge-base-offset-tlsle.ll
More information about the llvm-commits
mailing list