[llvm-branch-commits] [llvm] [LoongArch] Merge base and offset for tls-le code sequence (PR #122999)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jan 14 22:37:11 PST 2025
https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/122999
>From e37c2933125a65e627949557fba7a606d41db716 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 14 Jan 2025 21:35:31 +0800
Subject: [PATCH] [LoongArch] Merge base and offset for tls-le code sequence
Adapt the merge base offset pass to optimize the tls-le
code sequence.
---
.../LoongArch/LoongArchMergeBaseOffset.cpp | 165 ++++++++-
.../LoongArch/machinelicm-address-pseudos.ll | 6 +-
.../LoongArch/merge-base-offset-tlsle.ll | 318 +++++++-----------
3 files changed, 266 insertions(+), 223 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
index 7f98f7718a538d..2aae498e1f2de2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
@@ -37,6 +37,8 @@ class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
MachineInstr *&Lo20, MachineInstr *&Hi12,
MachineInstr *&Last);
+ bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Add,
+ MachineInstr *&Lo12);
bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
MachineInstr *&Lo20, MachineInstr *&Hi12,
@@ -176,7 +178,80 @@ bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
return true;
}
-// Update the offset in Hi20, Lo12, Lo20 and Hi12 instructions.
+// Detect the pattern:
+//
+// (small/medium):
+// lu12i.w vreg1, %le_hi20_r(s)
+// add.w/d vreg2, vreg1, r2, %le_add_r(s)
+// addi.w/d vreg3, vreg2, %le_lo12_r(s)
+
+// The pattern is only accepted if:
+// 1) The first instruction has only one use, which is the PseudoAddTPRel.
+// The second instruction has only one use, which is the ADDI. The
+// second instruction's last operand is the tp register.
+// 2) The address operands have the appropriate type, reflecting the
+// lowering of a thread_local global address using the pattern.
+// 3) The offset value in the ThreadLocal Global Address is 0.
+bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
+ MachineInstr *&Add,
+ MachineInstr *&Lo12) {
+ if (Hi20.getOpcode() != LoongArch::LU12I_W)
+ return false;
+
+ auto isGlobalOrCPI = [](const MachineOperand &Op) {
+ return Op.isGlobal() || Op.isCPI();
+ };
+
+ const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
+ if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_LE_HI_R ||
+ !isGlobalOrCPI(Hi20Op1) || Hi20Op1.getOffset() != 0)
+ return false;
+
+ Register HiDestReg = Hi20.getOperand(0).getReg();
+ if (!MRI->hasOneUse(HiDestReg))
+ return false;
+
+ Add = &*MRI->use_instr_begin(HiDestReg);
+ if ((ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_D) ||
+ (!ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_W))
+ return false;
+
+ if (Add->getOperand(2).getReg() != LoongArch::R2)
+ return false;
+
+ const MachineOperand &AddOp3 = Add->getOperand(3);
+ if (LoongArchII::getDirectFlags(AddOp3) != LoongArchII::MO_LE_ADD_R ||
+ !(isGlobalOrCPI(AddOp3) || AddOp3.isMCSymbol()) ||
+ AddOp3.getOffset() != 0)
+ return false;
+
+ Register AddDestReg = Add->getOperand(0).getReg();
+ if (!MRI->hasOneUse(AddDestReg))
+ return false;
+
+ Lo12 = &*MRI->use_instr_begin(AddDestReg);
+ if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
+ (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
+ return false;
+
+ const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
+ if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_LE_LO_R ||
+ !(isGlobalOrCPI(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
+ Lo12Op2.getOffset() != 0)
+ return false;
+
+ if (Hi20Op1.isGlobal()) {
+ LLVM_DEBUG(dbgs() << " Found lowered global address: "
+ << *Hi20Op1.getGlobal() << "\n");
+ } else if (Hi20Op1.isCPI()) {
+ LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
+ << "\n");
+ }
+
+ return true;
+}
+
+// Update the offset in Hi20, (Add), Lo12, (Lo20 and Hi12) instructions.
// Delete the tail instruction and update all the uses to use the
// output from Last.
void LoongArchMergeBaseOffsetOpt::foldOffset(
@@ -190,31 +265,49 @@ void LoongArchMergeBaseOffsetOpt::foldOffset(
Lo20->getOperand(2).setOffset(Offset);
Hi12->getOperand(2).setOffset(Offset);
}
+
+ // For tls-le, offset of the second PseudoAddTPRel instr should also be
+ // updated.
+ MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
+ if (Hi20.getOpcode() == LoongArch::LU12I_W)
+ Add->getOperand(3).setOffset(Offset);
+
// Delete the tail instruction.
MachineInstr *Def = Last ? Last : &Lo12;
MRI->constrainRegClass(Def->getOperand(0).getReg(),
MRI->getRegClass(Tail.getOperand(0).getReg()));
MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
Tail.eraseFromParent();
+
LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
- << " " << Hi20 << " " << Lo12;);
+ << " " << Hi20;);
+ if (Hi20.getOpcode() == LoongArch::LU12I_W) {
+ LLVM_DEBUG(dbgs() << " " << *Add;);
+ }
+ LLVM_DEBUG(dbgs() << " " << Lo12;);
if (Lo20 && Hi12) {
LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;);
}
}
// Detect patterns for large offsets that are passed into an ADD instruction.
-// If the pattern is found, updates the offset in Hi20, Lo12, Lo20 and Hi12
-// instructions and deletes TailAdd and the instructions that produced the
-// offset.
+// If the pattern is found, updates the offset in Hi20, (Add), Lo12,
+// (Lo20 and Hi12) instructions and deletes TailAdd and the instructions that
+// produced the offset.
//
// (The instructions marked with "!" are not necessarily present)
//
// Base address lowering is of the form:
-// Hi20: pcalau12i vreg1, %pc_hi20(s)
-// +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
-// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
-// +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
+// 1) pcala:
+// Hi20: pcalau12i vreg1, %pc_hi20(s)
+// +--- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
+// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
+// +--- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
+// |
+// | 2) tls-le:
+// | Hi20: lu12i.w vreg1, %le_hi20_r(s)
+// | Add: add.w/d vreg1, vreg1, r2, %le_add_r(s)
+// +--- Lo12: addi.w/d vreg2, vreg1, %le_lo12_r(s)
// |
// | The large offset can be one of the forms:
// |
@@ -334,7 +427,8 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
// Look for arithmetic instructions we can get an offset from.
// We might be able to remove the arithmetic instructions by folding the
- // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I).
+ // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I) or
+ // LU12I_W+PseudoAddTPRel+ADDI.
if (!MRI->hasOneUse(DestReg))
return false;
@@ -454,6 +548,7 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
// If all the uses are memory ops with the same offset, we can transform:
//
// 1. (small/medium):
+ // 1.1. pcala
// pcalau12i vreg1, %pc_hi20(s)
// addi.d vreg2, vreg1, %pc_lo12(s)
// ld.w vreg3, 8(vreg2)
@@ -463,6 +558,18 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
// pcalau12i vreg1, %pc_hi20(s+8)
// ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1)
//
+ // 1.2. tls-le
+ // lu12i.w vreg1, %le_hi20_r(s)
+ // add.w/d vreg2, vreg1, r2, %le_add_r(s)
+ // addi.w/d vreg3, vreg2, %le_lo12_r(s)
+ // ld.w vreg4, 8(vreg3)
+ //
+ // =>
+ //
+ // lu12i.w vreg1, %le_hi20_r(s+8)
+ // add.w/d vreg2, vreg1, r2, %le_add_r(s+8)
+ // ld.w vreg4, vreg2, %le_lo12_r(s+8)(vreg2)
+ //
// 2. (large):
// pcalau12i vreg1, %pc_hi20(s)
// addi.d vreg2, $zero, %pc_lo12(s)
@@ -598,7 +705,8 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
return false;
// If optimized by this pass successfully, MO_RELAX bitmask target-flag should
- // be removed from the code sequence.
+ // be removed from the pcala code sequence. Code sequence of tls-le can still
+ // be relaxed after being optimized.
//
// For example:
// pcalau12i $a0, %pc_hi20(symbol)
@@ -614,15 +722,20 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
// optimized, it cannot be relaxed any more. So MO_RELAX flag should not be
// carried by them.
Hi20.getOperand(1).setOffset(NewOffset);
- Hi20.getOperand(1).setTargetFlags(
- LoongArchII::getDirectFlags(Hi20.getOperand(1)));
MachineOperand &ImmOp = Lo12.getOperand(2);
ImmOp.setOffset(NewOffset);
- ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
if (Lo20 && Hi12) {
Lo20->getOperand(2).setOffset(NewOffset);
Hi12->getOperand(2).setOffset(NewOffset);
}
+ if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
+ Hi20.getOperand(1).setTargetFlags(
+ LoongArchII::getDirectFlags(Hi20.getOperand(1)));
+ ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
+ } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
+ MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
+ Add->getOperand(3).setOffset(NewOffset);
+ }
// Update the immediate in the load/store instructions to add the offset.
const LoongArchInstrInfo &TII = *ST->getInstrInfo();
@@ -673,7 +786,14 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
return true;
}
- MRI->replaceRegWith(Lo12.getOperand(0).getReg(), Hi20.getOperand(0).getReg());
+ if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
+ MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
+ Hi20.getOperand(0).getReg());
+ } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
+ MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
+ MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
+ Add->getOperand(0).getReg());
+ }
Lo12.eraseFromParent();
return true;
}
@@ -693,8 +813,21 @@ bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
MachineInstr *Lo20 = nullptr;
MachineInstr *Hi12 = nullptr;
MachineInstr *Last = nullptr;
- if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
+ if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
+ // Detect foldable pcala code sequence in small/medium/large code model.
+ if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
+ continue;
+ } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
+ MachineInstr *Add = nullptr;
+ // Detect foldable tls-le code sequence in small/medium code model.
+ if (!detectFoldable(Hi20, Add, Lo12))
+ continue;
+ } else {
continue;
+ }
+ // For tls-le, we do not pass the second PseudoAddTPRel instr in order to
+ // reuse the existing hooks and the last three paramaters should always be
+ // nullptr.
MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last);
MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last);
}
diff --git a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
index e0a93e3051bf88..92d079ab3a8d87 100644
--- a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
+++ b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
@@ -317,11 +317,10 @@ define void @test_la_tls_le(i32 signext %n) {
; LA32-NEXT: move $a1, $zero
; LA32-NEXT: lu12i.w $a2, %le_hi20_r(le)
; LA32-NEXT: add.w $a2, $a2, $tp, %le_add_r(le)
-; LA32-NEXT: addi.w $a2, $a2, %le_lo12_r(le)
; LA32-NEXT: .p2align 4, , 16
; LA32-NEXT: .LBB4_1: # %loop
; LA32-NEXT: # =>This Inner Loop Header: Depth=1
-; LA32-NEXT: ld.w $zero, $a2, 0
+; LA32-NEXT: ld.w $zero, $a2, %le_lo12_r(le)
; LA32-NEXT: addi.w $a1, $a1, 1
; LA32-NEXT: blt $a1, $a0, .LBB4_1
; LA32-NEXT: # %bb.2: # %ret
@@ -332,11 +331,10 @@ define void @test_la_tls_le(i32 signext %n) {
; LA64-NEXT: move $a1, $zero
; LA64-NEXT: lu12i.w $a2, %le_hi20_r(le)
; LA64-NEXT: add.d $a2, $a2, $tp, %le_add_r(le)
-; LA64-NEXT: addi.d $a2, $a2, %le_lo12_r(le)
; LA64-NEXT: .p2align 4, , 16
; LA64-NEXT: .LBB4_1: # %loop
; LA64-NEXT: # =>This Inner Loop Header: Depth=1
-; LA64-NEXT: ld.w $zero, $a2, 0
+; LA64-NEXT: ld.w $zero, $a2, %le_lo12_r(le)
; LA64-NEXT: addi.w $a1, $a1, 1
; LA64-NEXT: blt $a1, $a0, .LBB4_1
; LA64-NEXT: # %bb.2: # %ret
diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
index 7e995d224ce1d2..9ed9a865ce55d4 100644
--- a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
+++ b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll
@@ -11,16 +11,14 @@ define dso_local signext i8 @tlsle_load_s8() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
-; LA32-NEXT: ld.b $a0, $a0, 0
+; LA32-NEXT: ld.b $a0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_s8:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
-; LA64-NEXT: ld.b $a0, $a0, 0
+; LA64-NEXT: ld.b $a0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -33,16 +31,14 @@ define dso_local zeroext i8 @tlsle_load_u8() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
-; LA32-NEXT: ld.bu $a0, $a0, 0
+; LA32-NEXT: ld.bu $a0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_u8:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
-; LA64-NEXT: ld.bu $a0, $a0, 0
+; LA64-NEXT: ld.bu $a0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -55,18 +51,16 @@ define dso_local void @tlsle_store_i8() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: ori $a1, $zero, 1
-; LA32-NEXT: st.b $a1, $a0, 0
+; LA32-NEXT: st.b $a1, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_i8:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.b $a1, $a0, 0
+; LA64-NEXT: st.b $a1, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -81,16 +75,14 @@ define dso_local signext i16 @tlsle_load_s16() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
-; LA32-NEXT: ld.h $a0, $a0, 0
+; LA32-NEXT: ld.h $a0, $a0, %le_lo12_r(g_i16)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_s16:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
-; LA64-NEXT: ld.h $a0, $a0, 0
+; LA64-NEXT: ld.h $a0, $a0, %le_lo12_r(g_i16)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
@@ -103,16 +95,14 @@ define dso_local zeroext i16 @tlsle_load_u16() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
-; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: ld.hu $a0, $a0, %le_lo12_r(g_i16)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_u16:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
-; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: ld.hu $a0, $a0, %le_lo12_r(g_i16)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
@@ -125,18 +115,16 @@ define dso_local void @tlsle_store_i16() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16)
; LA32-NEXT: ori $a1, $zero, 1
-; LA32-NEXT: st.h $a1, $a0, 0
+; LA32-NEXT: st.h $a1, $a0, %le_lo12_r(g_i16)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_i16:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.h $a1, $a0, 0
+; LA64-NEXT: st.h $a1, $a0, %le_lo12_r(g_i16)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16)
@@ -151,16 +139,14 @@ define dso_local signext i32 @tlsle_load_s32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
-; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, %le_lo12_r(g_i32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_s32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
-; LA64-NEXT: ld.w $a0, $a0, 0
+; LA64-NEXT: ld.w $a0, $a0, %le_lo12_r(g_i32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
@@ -173,16 +159,14 @@ define dso_local zeroext i32 @tlsle_load_u32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
-; LA32-NEXT: ld.w $a0, $a0, 0
+; LA32-NEXT: ld.w $a0, $a0, %le_lo12_r(g_i32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_u32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
-; LA64-NEXT: ld.wu $a0, $a0, 0
+; LA64-NEXT: ld.wu $a0, $a0, %le_lo12_r(g_i32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
@@ -195,18 +179,16 @@ define dso_local void @tlsle_store_i32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32)
; LA32-NEXT: ori $a1, $zero, 1
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_i32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_i32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.w $a1, $a0, 0
+; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_i32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32)
@@ -230,8 +212,7 @@ define dso_local i64 @tlsle_load_i64() nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64)
-; LA64-NEXT: ld.d $a0, $a0, 0
+; LA64-NEXT: ld.d $a0, $a0, %le_lo12_r(g_i64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
@@ -254,9 +235,8 @@ define dso_local void @tlsle_store_i64() nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_i64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64)
@@ -271,16 +251,14 @@ define dso_local float @tlsle_load_f32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32)
-; LA32-NEXT: fld.s $fa0, $a0, 0
+; LA32-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_f32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_f32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32)
-; LA64-NEXT: fld.s $fa0, $a0, 0
+; LA64-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_f32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
@@ -293,18 +271,16 @@ define dso_local void @tlsle_store_f32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32)
; LA32-NEXT: lu12i.w $a1, 260096
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_f32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_f32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32)
; LA64-NEXT: lu12i.w $a1, 260096
-; LA64-NEXT: st.w $a1, $a0, 0
+; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_f32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32)
@@ -319,16 +295,14 @@ define dso_local double @tlsle_load_f64() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64)
-; LA32-NEXT: fld.d $fa0, $a0, 0
+; LA32-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_f64)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_f64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64)
-; LA64-NEXT: fld.d $fa0, $a0, 0
+; LA64-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_f64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
@@ -341,18 +315,16 @@ define dso_local void @tlsle_store_f64() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64)
; LA32-NEXT: vldi $vr0, -912
-; LA32-NEXT: fst.d $fa0, $a0, 0
+; LA32-NEXT: fst.d $fa0, $a0, %le_lo12_r(g_f64)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_f64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64)
; LA64-NEXT: lu52i.d $a1, $zero, 1023
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_f64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64)
@@ -380,11 +352,10 @@ define dso_local void @tlsle_store_multi() nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_m64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_m64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_m64)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_m64)
; LA64-NEXT: ori $a1, $zero, 2
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_m64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_m64)
@@ -400,18 +371,16 @@ define dso_local void @tlsle_store_sf32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf32)
-; LA32-NEXT: fld.s $fa0, $a0, 0
-; LA32-NEXT: fst.s $fa0, $a0, 0
+; LA32-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_sf32)
+; LA32-NEXT: fst.s $fa0, $a0, %le_lo12_r(g_sf32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_sf32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf32)
-; LA64-NEXT: fld.s $fa0, $a0, 0
-; LA64-NEXT: fst.s $fa0, $a0, 0
+; LA64-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_sf32)
+; LA64-NEXT: fst.s $fa0, $a0, %le_lo12_r(g_sf32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf32)
@@ -427,18 +396,16 @@ define dso_local void @tlsle_store_sf64() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf64)
-; LA32-NEXT: fld.d $fa0, $a0, 0
-; LA32-NEXT: fst.d $fa0, $a0, 0
+; LA32-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_sf64)
+; LA32-NEXT: fst.d $fa0, $a0, %le_lo12_r(g_sf64)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_sf64:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf64)
-; LA64-NEXT: fld.d $fa0, $a0, 0
-; LA64-NEXT: fst.d $fa0, $a0, 0
+; LA64-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_sf64)
+; LA64-NEXT: fst.d $fa0, $a0, %le_lo12_r(g_sf64)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf64)
@@ -455,24 +422,20 @@ define dso_local void @tlsle_copy_i32x4() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_src)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_src)
-; LA32-NEXT: vld $vr0, $a0, 0
+; LA32-NEXT: vld $vr0, $a0, %le_lo12_r(g_i32x4_src)
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_dst)
-; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: vst $vr0, $a0, %le_lo12_r(g_i32x4_dst)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_copy_i32x4:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_src)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_src)
-; LA64-NEXT: vld $vr0, $a0, 0
+; LA64-NEXT: vld $vr0, $a0, %le_lo12_r(g_i32x4_src)
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_dst)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_dst)
-; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: vst $vr0, $a0, %le_lo12_r(g_i32x4_dst)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x4_src)
@@ -490,24 +453,20 @@ define dso_local void @tlsle_copy_i32x8() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_src)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_src)
-; LA32-NEXT: xvld $xr0, $a0, 0
+; LA32-NEXT: xvld $xr0, $a0, %le_lo12_r(g_i32x8_src)
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_dst)
-; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i32x8_dst)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_copy_i32x8:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_src)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_src)
-; LA64-NEXT: xvld $xr0, $a0, 0
+; LA64-NEXT: xvld $xr0, $a0, %le_lo12_r(g_i32x8_src)
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_dst)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_dst)
-; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i32x8_dst)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x8_src)
@@ -524,24 +483,20 @@ define dso_local void @tlsle_copy_i8_to_i8x16() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
-; LA32-NEXT: vldrepl.b $vr0, $a0, 0
+; LA32-NEXT: vldrepl.b $vr0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x16)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x16)
-; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: vst $vr0, $a0, %le_lo12_r(g_i8x16)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_copy_i8_to_i8x16:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
-; LA64-NEXT: vldrepl.b $vr0, $a0, 0
+; LA64-NEXT: vldrepl.b $vr0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x16)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8x16)
-; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: vst $vr0, $a0, %le_lo12_r(g_i8x16)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -558,24 +513,20 @@ define dso_local void @tlsle_copy_i8_to_i8x32() nounwind {
; LA32: # %bb.0: # %entry
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8)
-; LA32-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA32-NEXT: xvldrepl.b $xr0, $a0, %le_lo12_r(g_i8)
; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32)
; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x32)
-; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i8x32)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_copy_i8_to_i8x32:
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8)
-; LA64-NEXT: xvldrepl.b $xr0, $a0, 0
+; LA64-NEXT: xvldrepl.b $xr0, $a0, %le_lo12_r(g_i8)
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8x32)
-; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i8x32)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8)
@@ -606,10 +557,9 @@ define dso_local void @tlsle_rmw() nounwind {
; LA64: # %bb.0: # %entry
; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_rmw)
; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_rmw)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_rmw)
-; LA64-NEXT: ld.d $a1, $a0, 0
+; LA64-NEXT: ld.d $a1, $a0, %le_lo12_r(g_rmw)
; LA64-NEXT: addi.d $a1, $a1, 1
-; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_rmw)
; LA64-NEXT: ret
entry:
%0 = call ptr @llvm.threadlocal.address.p0(ptr @g_rmw)
@@ -624,22 +574,18 @@ entry:
define dso_local void @tlsle_store_a32() nounwind {
; LA32-LABEL: tlsle_store_a32:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a32)
-; LA32-NEXT: lu12i.w $a1, 1
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4096)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32+4096)
; LA32-NEXT: ori $a1, $zero, 1
-; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4096)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_store_a32:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a32)
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4096)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32+4096)
; LA64-NEXT: ori $a1, $zero, 1
-; LA64-NEXT: stptr.w $a1, $a0, 4096
+; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4096)
; LA64-NEXT: ret
entry:
store i32 1, ptr getelementptr inbounds ([1 x i32], ptr @g_a32, i32 1024), align 4
@@ -681,29 +627,27 @@ entry:
define dso_local void @tlsle_control_flow_with_mem_access() nounwind {
; LA32-LABEL: tlsle_control_flow_with_mem_access:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a32)
-; LA32-NEXT: ld.w $a1, $a0, 4
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32+4)
+; LA32-NEXT: ld.w $a1, $a0, %le_lo12_r(g_a32+4)
; LA32-NEXT: ori $a2, $zero, 1
; LA32-NEXT: blt $a1, $a2, .LBB25_2
; LA32-NEXT: # %bb.1: # %if.then
; LA32-NEXT: ori $a1, $zero, 10
-; LA32-NEXT: st.w $a1, $a0, 4
+; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4)
; LA32-NEXT: .LBB25_2: # %if.end
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_control_flow_with_mem_access:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a32)
-; LA64-NEXT: ld.w $a1, $a0, 4
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32+4)
+; LA64-NEXT: ld.w $a1, $a0, %le_lo12_r(g_a32+4)
; LA64-NEXT: ori $a2, $zero, 1
; LA64-NEXT: blt $a1, $a2, .LBB25_2
; LA64-NEXT: # %bb.1: # %if.then
; LA64-NEXT: ori $a1, $zero, 10
-; LA64-NEXT: st.w $a1, $a0, 4
+; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4)
; LA64-NEXT: .LBB25_2: # %if.end
; LA64-NEXT: ret
entry:
@@ -724,18 +668,16 @@ if.end:
define dso_local ptr @tlsle_load_addr_offset_1() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_1:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, 8
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+8)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+8)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_1:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, 8
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+8)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+8)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1)
@@ -744,20 +686,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_257() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_257:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, 2047
-; LA32-NEXT: addi.w $a0, $a0, 9
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2056)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2056)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2056)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_257:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, 2047
-; LA64-NEXT: addi.d $a0, $a0, 9
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2056)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+2056)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+2056)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 257)
@@ -766,19 +704,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_1048576() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_1048576:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 2048
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388608)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+8388608)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+8388608)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_1048576:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: addu16i.d $a0, $a0, 128
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388608)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+8388608)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+8388608)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1048576)
@@ -787,21 +722,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_1048577() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_1048577:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 2048
-; LA32-NEXT: ori $a1, $a1, 8
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388616)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+8388616)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+8388616)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_1048577:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: addu16i.d $a0, $a0, 128
-; LA64-NEXT: addi.d $a0, $a0, 8
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388616)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+8388616)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+8388616)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1048577)
@@ -810,20 +740,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_268432896() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_268432896:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 524283
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463168)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2147463168)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2147463168)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_268432896:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: lu12i.w $a1, 524283
-; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463168)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+2147463168)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+2147463168)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 268432896)
@@ -832,22 +758,16 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_268432897() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_268432897:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 524283
-; LA32-NEXT: ori $a1, $a1, 8
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463176)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2147463176)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2147463176)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_268432897:
; LA64: # %bb.0: # %entry
-; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64)
-; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64)
-; LA64-NEXT: lu12i.w $a1, 524283
-; LA64-NEXT: ori $a1, $a1, 8
-; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463176)
+; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+2147463176)
+; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+2147463176)
; LA64-NEXT: ret
entry:
ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 268432897)
@@ -877,11 +797,9 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_248792680471040() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_248792680471040:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 502733
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2059194368)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2059194368)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2059194368)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_248792680471040:
@@ -900,12 +818,9 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_9380351707272() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_9380351707272:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 279556
-; LA32-NEXT: ori $a1, $a1, 1088
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+1145062464)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+1145062464)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+1145062464)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_9380351707272:
@@ -945,12 +860,9 @@ entry:
define dso_local ptr @tlsle_load_addr_offset_614749556925924693() nounwind {
; LA32-LABEL: tlsle_load_addr_offset_614749556925924693:
; LA32: # %bb.0: # %entry
-; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64)
-; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64)
-; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64)
-; LA32-NEXT: lu12i.w $a1, 209666
-; LA32-NEXT: ori $a1, $a1, 2728
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+858794664)
+; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+858794664)
+; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+858794664)
; LA32-NEXT: ret
;
; LA64-LABEL: tlsle_load_addr_offset_614749556925924693:
More information about the llvm-branch-commits
mailing list