[llvm] 5526786 - [VE] TLS codegen
Simon Moll via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 07:09:40 PST 2020
Author: Kazushi (Jam) Marukawa
Date: 2020-02-18T16:09:12+01:00
New Revision: 5526786a56bd5fb187a8c6f601268e58e351b3c8
URL: https://github.com/llvm/llvm-project/commit/5526786a56bd5fb187a8c6f601268e58e351b3c8
DIFF: https://github.com/llvm/llvm-project/commit/5526786a56bd5fb187a8c6f601268e58e351b3c8.diff
LOG: [VE] TLS codegen
Summary:
Codegen and tests for thread-local storage.
This implements only the general dynamic model due to limitations in nld 2.26.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D74718
Added:
llvm/test/CodeGen/VE/tls.ll
Modified:
llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
llvm/lib/Target/VE/VEAsmPrinter.cpp
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/lib/Target/VE/VEInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
index 2d796699a3cf..46a340f95d97 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h
@@ -42,6 +42,12 @@ enum Fixups {
fixup_ve_plt_hi32,
fixup_ve_plt_lo32,
+ /// fixups for Thread Local Storage
+ fixup_ve_tls_gd_hi32,
+ fixup_ve_tls_gd_lo32,
+ fixup_ve_tpoff_hi32,
+ fixup_ve_tpoff_lo32,
+
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
index abb490eb1274..e601a6068536 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp
@@ -54,6 +54,10 @@ bool VEMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind) {
case VK_VE_GOTOFF_LO32:
case VK_VE_PLT_HI32:
case VK_VE_PLT_LO32:
+ case VK_VE_TLS_GD_HI32:
+ case VK_VE_TLS_GD_LO32:
+ case VK_VE_TPOFF_HI32:
+ case VK_VE_TPOFF_LO32:
return false; // OS << "@<text>("; break;
}
return true;
@@ -90,9 +94,21 @@ void VEMCExpr::printVariantKindSuffix(raw_ostream &OS, VariantKind Kind) {
case VK_VE_PLT_HI32:
OS << "@plt_hi";
break;
+ case VK_VE_TLS_GD_HI32:
+ OS << "@tls_gd_hi";
+ break;
+ case VK_VE_TLS_GD_LO32:
+ OS << "@tls_gd_lo";
+ break;
case VK_VE_PLT_LO32:
OS << "@plt_lo";
break;
+ case VK_VE_TPOFF_HI32:
+ OS << "@tpoff_hi";
+ break;
+ case VK_VE_TPOFF_LO32:
+ OS << "@tpoff_lo";
+ break;
}
}
@@ -108,6 +124,10 @@ VEMCExpr::VariantKind VEMCExpr::parseVariantKind(StringRef name) {
.Case("gotoff_lo", VK_VE_GOTOFF_LO32)
.Case("plt_hi", VK_VE_PLT_HI32)
.Case("plt_lo", VK_VE_PLT_LO32)
+ .Case("tls_gd_hi", VK_VE_TLS_GD_HI32)
+ .Case("tls_gd_lo", VK_VE_TLS_GD_LO32)
+ .Case("tpoff_hi", VK_VE_TPOFF_HI32)
+ .Case("tpoff_lo", VK_VE_TPOFF_LO32)
.Default(VK_VE_None);
}
@@ -135,6 +155,10 @@ VE::Fixups VEMCExpr::getFixupKind(VEMCExpr::VariantKind Kind) {
return VE::fixup_ve_plt_hi32;
case VK_VE_PLT_LO32:
return VE::fixup_ve_plt_lo32;
+ case VK_VE_TLS_GD_HI32:
+ return VE::fixup_ve_tls_gd_hi32;
+ case VK_VE_TLS_GD_LO32:
+ return VE::fixup_ve_tls_gd_lo32;
}
}
@@ -144,10 +168,38 @@ bool VEMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
}
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+ switch (Expr->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
+ fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
+ fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
+ cast<MCSymbolELF>(SymRef.getSymbol()).setType(ELF::STT_TLS);
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+ break;
+ }
+}
+
void VEMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
Streamer.visitUsedExpr(*getSubExpr());
}
void VEMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
- llvm_unreachable("TODO implement");
+ fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
index 8e884443e41f..e2f6f2cb570a 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
@@ -34,6 +34,10 @@ class VEMCExpr : public MCTargetExpr {
VK_VE_GOTOFF_LO32,
VK_VE_PLT_HI32,
VK_VE_PLT_LO32,
+ VK_VE_TLS_GD_HI32,
+ VK_VE_TLS_GD_LO32,
+ VK_VE_TPOFF_HI32,
+ VK_VE_TPOFF_LO32,
};
private:
diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp
index 6e6acffcb402..99bbe441608d 100644
--- a/llvm/lib/Target/VE/VEAsmPrinter.cpp
+++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -51,6 +51,8 @@ class VEAsmPrinter : public AsmPrinter {
const MCSubtargetInfo &STI);
void lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI,
const MCSubtargetInfo &STI);
+ void lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
+ const MCSubtargetInfo &STI);
void emitInstruction(const MachineInstr *MI) override;
@@ -82,6 +84,15 @@ static void emitSIC(MCStreamer &OutStreamer, MCOperand &RD,
OutStreamer.emitInstruction(SICInst, STI);
}
+static void emitBSIC(MCStreamer &OutStreamer, MCOperand &R1, MCOperand &R2,
+ const MCSubtargetInfo &STI) {
+ MCInst BSICInst;
+ BSICInst.setOpcode(VE::BSIC);
+ BSICInst.addOperand(R1);
+ BSICInst.addOperand(R2);
+ OutStreamer.emitInstruction(BSICInst, STI);
+}
+
static void emitLEAzzi(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD,
const MCSubtargetInfo &STI) {
MCInst LEAInst;
@@ -241,6 +252,63 @@ void VEAsmPrinter::lowerGETFunPLTAndEmitMCInsts(const MachineInstr *MI,
emitLEASLrri(*OutStreamer, MCRegOP, RegPLT, hiImm, MCRegOP, STI);
}
+void VEAsmPrinter::lowerGETTLSAddrAndEmitMCInsts(const MachineInstr *MI,
+ const MCSubtargetInfo &STI) {
+ const MachineOperand &Addr = MI->getOperand(0);
+ MCSymbol *AddrSym = nullptr;
+
+ switch (Addr.getType()) {
+ default:
+ llvm_unreachable("<unknown operand type>");
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ report_fatal_error("MBB is not supported yet");
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ report_fatal_error("ConstantPool is not supported yet");
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ AddrSym = GetExternalSymbolSymbol(Addr.getSymbolName());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ AddrSym = getSymbol(Addr.getGlobal());
+ break;
+ }
+
+ MCOperand RegLR = MCOperand::createReg(VE::SX10); // LR
+ MCOperand RegS0 = MCOperand::createReg(VE::SX0); // S0
+ MCOperand RegS12 = MCOperand::createReg(VE::SX12); // S12
+ MCSymbol *GetTLSLabel = OutContext.getOrCreateSymbol(Twine("__tls_get_addr"));
+
+ // lea %s0, sym at tls_gd_lo(-24)
+ // and %s0, %s0, (32)0
+ // sic %lr
+ // lea.sl %s0, sym at tls_gd_hi(%s0, %lr)
+ // lea %s12, __tls_get_addr at plt_lo(8)
+ // and %s12, %s12, (32)0
+ // lea.sl %s12, __tls_get_addr at plt_hi(%s12, %lr)
+ // bsic %lr, (, %s12)
+ MCOperand cim24 = MCOperand::createImm(-24);
+ MCOperand loImm =
+ createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_LO32, AddrSym, OutContext);
+ emitLEAzii(*OutStreamer, cim24, loImm, RegS0, STI);
+ MCOperand ci32 = MCOperand::createImm(32);
+ emitANDrm0(*OutStreamer, RegS0, ci32, RegS0, STI);
+ emitSIC(*OutStreamer, RegLR, STI);
+ MCOperand hiImm =
+ createGOTRelExprOp(VEMCExpr::VK_VE_TLS_GD_HI32, AddrSym, OutContext);
+ emitLEASLrri(*OutStreamer, RegS0, RegLR, hiImm, RegS0, STI);
+ MCOperand ci8 = MCOperand::createImm(8);
+ MCOperand loImm2 =
+ createGOTRelExprOp(VEMCExpr::VK_VE_PLT_LO32, GetTLSLabel, OutContext);
+ emitLEAzii(*OutStreamer, ci8, loImm2, RegS12, STI);
+ emitANDrm0(*OutStreamer, RegS12, ci32, RegS12, STI);
+ MCOperand hiImm2 =
+ createGOTRelExprOp(VEMCExpr::VK_VE_PLT_HI32, GetTLSLabel, OutContext);
+ emitLEASLrri(*OutStreamer, RegS12, RegLR, hiImm2, RegS12, STI);
+ emitBSIC(*OutStreamer, RegLR, RegS12, STI);
+}
+
void VEAsmPrinter::emitInstruction(const MachineInstr *MI) {
switch (MI->getOpcode()) {
@@ -255,6 +323,9 @@ void VEAsmPrinter::emitInstruction(const MachineInstr *MI) {
case VE::GETFUNPLT:
lowerGETFunPLTAndEmitMCInsts(MI, getSubtargetInfo());
return;
+ case VE::GETTLSADDR:
+ lowerGETTLSAddrAndEmitMCInsts(MI, getSubtargetInfo());
+ return;
}
MachineBasicBlock::const_instr_iterator I = MI->getIterator();
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 70dc35989ff2..3646d5d59ccc 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -572,6 +572,7 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
setOperationAction(ISD::BlockAddress, PtrVT, Custom);
setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
/// VAARG handling {
setOperationAction(ISD::VASTART, MVT::Other, Custom);
@@ -640,6 +641,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(Lo)
TARGET_NODE_CASE(Hi)
TARGET_NODE_CASE(GETFUNPLT)
+ TARGET_NODE_CASE(GETTLSADDR)
TARGET_NODE_CASE(CALL)
TARGET_NODE_CASE(RET_FLAG)
TARGET_NODE_CASE(GLOBAL_BASE_REG)
@@ -748,6 +750,56 @@ SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
return makeAddress(Op, DAG);
}
+SDValue
+VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+
+ // Generate the following code:
+ // t1: ch,glue = callseq_start t0, 0, 0
+ // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
+ // t3: ch,glue = callseq_end t2, 0, 0, t2:2
+ // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
+ SDValue Label = withTargetFlags(Op, 0, DAG);
+ EVT PtrVT = Op.getValueType();
+
+ // Lowering the machine isd will make sure everything is in the right
+ // location.
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
+ DAG.getMachineFunction(), CallingConv::C);
+ Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
+ SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
+ Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
+ DAG.getIntPtrConstant(0, dl, true),
+ Chain.getValue(1), dl);
+ Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
+
+ // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI.setHasCalls(true);
+
+ // Also generate code to prepare a GOT register if it is PIC.
+ if (isPositionIndependent()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
+ }
+
+ return Chain;
+}
+
+SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The current implementation of nld (2.26) doesn't allow local exec model
+ // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
+ // generate the general dynamic model code sequence.
+ //
+ // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
+ return LowerToTLSGeneralDynamicModel(Op, DAG);
+}
+
SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
@@ -816,6 +868,8 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ return LowerGlobalTLSAddress(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG);
case ISD::VAARG:
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index 0d3796ebb60e..3d27a319ba28 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -28,6 +28,8 @@ enum NodeType : unsigned {
Lo, // Hi/Lo operations, typically on a global address.
GETFUNPLT, // load function address through %plt insturction
+ GETTLSADDR, // load address for TLS access
+
CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
GLOBAL_BASE_REG, // Global base reg for PIC.
@@ -77,6 +79,8 @@ class VETargetLowering : public TargetLowering {
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const;
/// } Custom Lower
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 8fb89c83c5ff..4000bdac9f36 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -216,6 +216,12 @@ def getGOT : Operand<iPTR>;
// GETFUNPLT for PIC
def GetFunPLT : SDNode<"VEISD::GETFUNPLT", SDTIntUnaryOp>;
+// GETTLSADDR for TLS
+def GetTLSAddr : SDNode<"VEISD::GETTLSADDR", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+
//===----------------------------------------------------------------------===//
// VE Flag Conditions
@@ -1027,6 +1033,11 @@ def RET : CF<
"b.l (,%lr)",
[(retflag)]>;
+// Branch and Save IC
+
+let cx = 0, cy = 0, cy = 0, cz = 1, hasSideEffects = 0 /* , Uses = [IC] */ in
+def BSIC : RM<0x08, (outs), (ins I64:$sx, I64:$sz), "bsic $sx, (, ${sz})">;
+
// Branch instruction
let cx = 0, cx2 = 0, bpf = 0 /* NONE */ in
defm BCRL : BCRm<"br${cf}.l", "br.l", 0x18, I64, i64, simm7Op64, uimm6Op64>;
@@ -1197,6 +1208,13 @@ def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
(LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32),
(tglobaladdr:$in1))>;
+// GlobalTLS address calculation and its optimization
+def : Pat<(VEhi tglobaltlsaddr:$in), (LEASLzzi tglobaltlsaddr:$in)>;
+def : Pat<(VElo tglobaltlsaddr:$in), (ANDrm0 (LEAzzi tglobaltlsaddr:$in), 32)>;
+def : Pat<(add (VEhi tglobaltlsaddr:$in1), (VElo tglobaltlsaddr:$in2)),
+ (LEASLrzi (ANDrm0 (LEAzzi tglobaltlsaddr:$in2), 32),
+ (tglobaltlsaddr:$in1))>;
+
// Address calculation and its optimization
def : Pat<(VEhi texternalsym:$in), (LEASLzzi texternalsym:$in)>;
def : Pat<(VElo texternalsym:$in), (ANDrm0 (LEAzzi texternalsym:$in), 32)>;
@@ -1247,6 +1265,14 @@ def : Pat<(GetFunPLT tglobaladdr:$dst),
def : Pat<(GetFunPLT texternalsym:$dst),
(GETFUNPLT texternalsym:$dst)>;
+// GETTLSADDR for TLS
+let Defs = [SX0, SX10, SX12], hasSideEffects = 0 in
+def GETTLSADDR : Pseudo<(outs), (ins i64imm:$addr),
+ "# GETTLSADDR $addr",
+ [(GetTLSAddr tglobaltlsaddr:$addr)] >;
+
+def : Pat<(GetTLSAddr tglobaltlsaddr:$dst),
+ (GETTLSADDR tglobaltlsaddr:$dst)>;
let Defs = [SX11], Uses = [SX11], hasSideEffects = 0 in {
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt, i64imm:$amt2),
diff --git a/llvm/test/CodeGen/VE/tls.ll b/llvm/test/CodeGen/VE/tls.ll
new file mode 100644
index 000000000000..9b1672295eef
--- /dev/null
+++ b/llvm/test/CodeGen/VE/tls.ll
@@ -0,0 +1,381 @@
+; FIXME: even under non-pic mode, llvm needs to generate pic code since nld
+; doesn't work with non-pic code. Thefore, we test pic codes for
+; both cases here.
+; llc -mtriple ve < %s | FileCheck %s -check-prefix=LOCAL
+; RUN: llc -mtriple ve < %s | FileCheck %s -check-prefix=GENDYN
+; RUN: llc -mtriple ve -relocation-model=pic < %s | FileCheck %s -check-prefix=GENDYNPIC
+
+ at x = external thread_local global i32, align 4
+ at y = internal thread_local global i32 0, align 4
+
+; Function Attrs: norecurse nounwind readnone
+define nonnull i32* @get_global() {
+; GENDYN-LABEL: get_global:
+; GENDYN: # %bb.0: # %entry
+; GENDYN-NEXT: st %s9, (,%s11)
+; GENDYN-NEXT: st %s10, 8(,%s11)
+; GENDYN-NEXT: st %s15, 24(,%s11)
+; GENDYN-NEXT: st %s16, 32(,%s11)
+; GENDYN-NEXT: or %s9, 0, %s11
+; GENDYN-NEXT: lea %s13, -240
+; GENDYN-NEXT: and %s13, %s13, (32)0
+; GENDYN-NEXT: lea.sl %s11, -1(%s11, %s13)
+; GENDYN-NEXT: brge.l %s11, %s8, .LBB0_2
+; GENDYN-NEXT: # %bb.1: # %entry
+; GENDYN-NEXT: ld %s61, 24(,%s14)
+; GENDYN-NEXT: or %s62, 0, %s0
+; GENDYN-NEXT: lea %s63, 315
+; GENDYN-NEXT: shm.l %s63, (%s61)
+; GENDYN-NEXT: shm.l %s8, 8(%s61)
+; GENDYN-NEXT: shm.l %s11, 16(%s61)
+; GENDYN-NEXT: monc
+; GENDYN-NEXT: or %s0, 0, %s62
+; GENDYN-NEXT: .LBB0_2: # %entry
+; GENDYN-NEXT: lea %s0, x at tls_gd_lo(-24)
+; GENDYN-NEXT: and %s0, %s0, (32)0
+; GENDYN-NEXT: sic %s10
+; GENDYN-NEXT: lea.sl %s0, x at tls_gd_hi(%s10, %s0)
+; GENDYN-NEXT: lea %s12, __tls_get_addr at plt_lo(8)
+; GENDYN-NEXT: and %s12, %s12, (32)0
+; GENDYN-NEXT: lea.sl %s12, __tls_get_addr at plt_hi(%s10, %s12)
+; GENDYN-NEXT: bsic %s10, (, %s12)
+; GENDYN-NEXT: or %s11, 0, %s9
+; GENDYN-NEXT: ld %s16, 32(,%s11)
+; GENDYN-NEXT: ld %s15, 24(,%s11)
+; GENDYN-NEXT: ld %s10, 8(,%s11)
+; GENDYN-NEXT: ld %s9, (,%s11)
+; GENDYN-NEXT: b.l (,%lr)
+;
+; GENDYNPIC-LABEL: get_global:
+; GENDYNPIC: # %bb.0: # %entry
+; GENDYNPIC-NEXT: st %s9, (,%s11)
+; GENDYNPIC-NEXT: st %s10, 8(,%s11)
+; GENDYNPIC-NEXT: st %s15, 24(,%s11)
+; GENDYNPIC-NEXT: st %s16, 32(,%s11)
+; GENDYNPIC-NEXT: or %s9, 0, %s11
+; GENDYNPIC-NEXT: lea %s13, -240
+; GENDYNPIC-NEXT: and %s13, %s13, (32)0
+; GENDYNPIC-NEXT: lea.sl %s11, -1(%s11, %s13)
+; GENDYNPIC-NEXT: brge.l %s11, %s8, .LBB0_2
+; GENDYNPIC-NEXT: # %bb.1: # %entry
+; GENDYNPIC-NEXT: ld %s61, 24(,%s14)
+; GENDYNPIC-NEXT: or %s62, 0, %s0
+; GENDYNPIC-NEXT: lea %s63, 315
+; GENDYNPIC-NEXT: shm.l %s63, (%s61)
+; GENDYNPIC-NEXT: shm.l %s8, 8(%s61)
+; GENDYNPIC-NEXT: shm.l %s11, 16(%s61)
+; GENDYNPIC-NEXT: monc
+; GENDYNPIC-NEXT: or %s0, 0, %s62
+; GENDYNPIC-NEXT: .LBB0_2: # %entry
+; GENDYNPIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_ at pc_lo(-24)
+; GENDYNPIC-NEXT: and %s15, %s15, (32)0
+; GENDYNPIC-NEXT: sic %s16
+; GENDYNPIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; GENDYNPIC-NEXT: lea %s0, x at tls_gd_lo(-24)
+; GENDYNPIC-NEXT: and %s0, %s0, (32)0
+; GENDYNPIC-NEXT: sic %s10
+; GENDYNPIC-NEXT: lea.sl %s0, x at tls_gd_hi(%s10, %s0)
+; GENDYNPIC-NEXT: lea %s12, __tls_get_addr at plt_lo(8)
+; GENDYNPIC-NEXT: and %s12, %s12, (32)0
+; GENDYNPIC-NEXT: lea.sl %s12, __tls_get_addr at plt_hi(%s10, %s12)
+; GENDYNPIC-NEXT: bsic %s10, (, %s12)
+; GENDYNPIC-NEXT: or %s11, 0, %s9
+; GENDYNPIC-NEXT: ld %s16, 32(,%s11)
+; GENDYNPIC-NEXT: ld %s15, 24(,%s11)
+; GENDYNPIC-NEXT: ld %s10, 8(,%s11)
+; GENDYNPIC-NEXT: ld %s9, (,%s11)
+; GENDYNPIC-NEXT: b.l (,%lr)
+; LOCAL-LABEL: get_global:
+; LOCAL: .LBB{{[0-9]+}}_2:
+; LOCAL-NEXT: lea %s34, x at tpoff_lo
+; LOCAL-NEXT: and %s34, %s34, (32)0
+; LOCAL-NEXT: lea.sl %s34, x at tpoff_hi(%s34)
+; LOCAL-NEXT: adds.l %s0, %s14, %s34
+; LOCAL-NEXT: or %s11, 0, %s9
+entry:
+ ret i32* @x
+}
+
+; Function Attrs: norecurse nounwind readnone
+define nonnull i32* @get_local() {
+; GENDYN-LABEL: get_local:
+; GENDYN: # %bb.0: # %entry
+; GENDYN-NEXT: st %s9, (,%s11)
+; GENDYN-NEXT: st %s10, 8(,%s11)
+; GENDYN-NEXT: st %s15, 24(,%s11)
+; GENDYN-NEXT: st %s16, 32(,%s11)
+; GENDYN-NEXT: or %s9, 0, %s11
+; GENDYN-NEXT: lea %s13, -240
+; GENDYN-NEXT: and %s13, %s13, (32)0
+; GENDYN-NEXT: lea.sl %s11, -1(%s11, %s13)
+; GENDYN-NEXT: brge.l %s11, %s8, .LBB1_2
+; GENDYN-NEXT: # %bb.1: # %entry
+; GENDYN-NEXT: ld %s61, 24(,%s14)
+; GENDYN-NEXT: or %s62, 0, %s0
+; GENDYN-NEXT: lea %s63, 315
+; GENDYN-NEXT: shm.l %s63, (%s61)
+; GENDYN-NEXT: shm.l %s8, 8(%s61)
+; GENDYN-NEXT: shm.l %s11, 16(%s61)
+; GENDYN-NEXT: monc
+; GENDYN-NEXT: or %s0, 0, %s62
+; GENDYN-NEXT: .LBB1_2: # %entry
+; GENDYN-NEXT: lea %s0, y at tls_gd_lo(-24)
+; GENDYN-NEXT: and %s0, %s0, (32)0
+; GENDYN-NEXT: sic %s10
+; GENDYN-NEXT: lea.sl %s0, y at tls_gd_hi(%s10, %s0)
+; GENDYN-NEXT: lea %s12, __tls_get_addr at plt_lo(8)
+; GENDYN-NEXT: and %s12, %s12, (32)0
+; GENDYN-NEXT: lea.sl %s12, __tls_get_addr at plt_hi(%s10, %s12)
+; GENDYN-NEXT: bsic %s10, (, %s12)
+; GENDYN-NEXT: or %s11, 0, %s9
+; GENDYN-NEXT: ld %s16, 32(,%s11)
+; GENDYN-NEXT: ld %s15, 24(,%s11)
+; GENDYN-NEXT: ld %s10, 8(,%s11)
+; GENDYN-NEXT: ld %s9, (,%s11)
+; GENDYN-NEXT: b.l (,%lr)
+;
+; GENDYNPIC-LABEL: get_local:
+; GENDYNPIC: # %bb.0: # %entry
+; GENDYNPIC-NEXT: st %s9, (,%s11)
+; GENDYNPIC-NEXT: st %s10, 8(,%s11)
+; GENDYNPIC-NEXT: st %s15, 24(,%s11)
+; GENDYNPIC-NEXT: st %s16, 32(,%s11)
+; GENDYNPIC-NEXT: or %s9, 0, %s11
+; GENDYNPIC-NEXT: lea %s13, -240
+; GENDYNPIC-NEXT: and %s13, %s13, (32)0
+; GENDYNPIC-NEXT: lea.sl %s11, -1(%s11, %s13)
+; GENDYNPIC-NEXT: brge.l %s11, %s8, .LBB1_2
+; GENDYNPIC-NEXT: # %bb.1: # %entry
+; GENDYNPIC-NEXT: ld %s61, 24(,%s14)
+; GENDYNPIC-NEXT: or %s62, 0, %s0
+; GENDYNPIC-NEXT: lea %s63, 315
+; GENDYNPIC-NEXT: shm.l %s63, (%s61)
+; GENDYNPIC-NEXT: shm.l %s8, 8(%s61)
+; GENDYNPIC-NEXT: shm.l %s11, 16(%s61)
+; GENDYNPIC-NEXT: monc
+; GENDYNPIC-NEXT: or %s0, 0, %s62
+; GENDYNPIC-NEXT: .LBB1_2: # %entry
+; GENDYNPIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_ at pc_lo(-24)
+; GENDYNPIC-NEXT: and %s15, %s15, (32)0
+; GENDYNPIC-NEXT: sic %s16
+; GENDYNPIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; GENDYNPIC-NEXT: lea %s0, y at tls_gd_lo(-24)
+; GENDYNPIC-NEXT: and %s0, %s0, (32)0
+; GENDYNPIC-NEXT: sic %s10
+; GENDYNPIC-NEXT: lea.sl %s0, y at tls_gd_hi(%s10, %s0)
+; GENDYNPIC-NEXT: lea %s12, __tls_get_addr at plt_lo(8)
+; GENDYNPIC-NEXT: and %s12, %s12, (32)0
+; GENDYNPIC-NEXT: lea.sl %s12, __tls_get_addr at plt_hi(%s10, %s12)
+; GENDYNPIC-NEXT: bsic %s10, (, %s12)
+; GENDYNPIC-NEXT: or %s11, 0, %s9
+; GENDYNPIC-NEXT: ld %s16, 32(,%s11)
+; GENDYNPIC-NEXT: ld %s15, 24(,%s11)
+; GENDYNPIC-NEXT: ld %s10, 8(,%s11)
+; GENDYNPIC-NEXT: ld %s9, (,%s11)
+; GENDYNPIC-NEXT: b.l (,%lr)
+; LOCAL-LABEL: get_local:
+; LOCAL: .LBB{{[0-9]+}}_2:
+; LOCAL-NEXT: lea %s34, y at tpoff_lo
+; LOCAL-NEXT: and %s34, %s34, (32)0
+; LOCAL-NEXT: lea.sl %s34, y at tpoff_hi(%s34)
+; LOCAL-NEXT: adds.l %s0, %s14, %s34
+; LOCAL-NEXT: or %s11, 0, %s9
+entry:
+ ret i32* @y
+}
+
+; Function Attrs: norecurse nounwind
+define void @set_global(i32 %v) {
+; GENDYN-LABEL: set_global:
+; GENDYN: # %bb.0: # %entry
+; GENDYN-NEXT: st %s9, (,%s11)
+; GENDYN-NEXT: st %s10, 8(,%s11)
+; GENDYN-NEXT: st %s15, 24(,%s11)
+; GENDYN-NEXT: st %s16, 32(,%s11)
+; GENDYN-NEXT: or %s9, 0, %s11
+; GENDYN-NEXT: lea %s13, -240
+; GENDYN-NEXT: and %s13, %s13, (32)0
+; GENDYN-NEXT: lea.sl %s11, -1(%s11, %s13)
+; GENDYN-NEXT: brge.l %s11, %s8, .LBB2_2
+; GENDYN-NEXT: # %bb.1: # %entry
+; GENDYN-NEXT: ld %s61, 24(,%s14)
+; GENDYN-NEXT: or %s62, 0, %s0
+; GENDYN-NEXT: lea %s63, 315
+; GENDYN-NEXT: shm.l %s63, (%s61)
+; GENDYN-NEXT: shm.l %s8, 8(%s61)
+; GENDYN-NEXT: shm.l %s11, 16(%s61)
+; GENDYN-NEXT: monc
+; GENDYN-NEXT: or %s0, 0, %s62
+; GENDYN-NEXT: .LBB2_2: # %entry
+; GENDYN-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
+; GENDYN-NEXT: or %s18, 0, %s0
+; GENDYN-NEXT: lea %s0, x at tls_gd_lo(-24)
+; GENDYN-NEXT: and %s0, %s0, (32)0
+; GENDYN-NEXT: sic %s10
+; GENDYN-NEXT: lea.sl %s0, x at tls_gd_hi(%s10, %s0)
+; GENDYN-NEXT: lea %s12, __tls_get_addr at plt_lo(8)
+; GENDYN-NEXT: and %s12, %s12, (32)0
+; GENDYN-NEXT: lea.sl %s12, __tls_get_addr at plt_hi(%s10, %s12)
+; GENDYN-NEXT: bsic %s10, (, %s12)
+; GENDYN-NEXT: stl %s18, (,%s0)
+; GENDYN-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
+; GENDYN-NEXT: or %s11, 0, %s9
+; GENDYN-NEXT: ld %s16, 32(,%s11)
+; GENDYN-NEXT: ld %s15, 24(,%s11)
+; GENDYN-NEXT: ld %s10, 8(,%s11)
+; GENDYN-NEXT: ld %s9, (,%s11)
+; GENDYN-NEXT: b.l (,%lr)
+;
+; GENDYNPIC-LABEL: set_global:
+; GENDYNPIC: # %bb.0: # %entry
+; GENDYNPIC-NEXT: st %s9, (,%s11)
+; GENDYNPIC-NEXT: st %s10, 8(,%s11)
+; GENDYNPIC-NEXT: st %s15, 24(,%s11)
+; GENDYNPIC-NEXT: st %s16, 32(,%s11)
+; GENDYNPIC-NEXT: or %s9, 0, %s11
+; GENDYNPIC-NEXT: lea %s13, -240
+; GENDYNPIC-NEXT: and %s13, %s13, (32)0
+; GENDYNPIC-NEXT: lea.sl %s11, -1(%s11, %s13)
+; GENDYNPIC-NEXT: brge.l %s11, %s8, .LBB2_2
+; GENDYNPIC-NEXT: # %bb.1: # %entry
+; GENDYNPIC-NEXT: ld %s61, 24(,%s14)
+; GENDYNPIC-NEXT: or %s62, 0, %s0
+; GENDYNPIC-NEXT: lea %s63, 315
+; GENDYNPIC-NEXT: shm.l %s63, (%s61)
+; GENDYNPIC-NEXT: shm.l %s8, 8(%s61)
+; GENDYNPIC-NEXT: shm.l %s11, 16(%s61)
+; GENDYNPIC-NEXT: monc
+; GENDYNPIC-NEXT: or %s0, 0, %s62
+; GENDYNPIC-NEXT: .LBB2_2: # %entry
+; GENDYNPIC-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
+; GENDYNPIC-NEXT: or %s18, 0, %s0
+; GENDYNPIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_ at pc_lo(-24)
+; GENDYNPIC-NEXT: and %s15, %s15, (32)0
+; GENDYNPIC-NEXT: sic %s16
+; GENDYNPIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; GENDYNPIC-NEXT: lea %s0, x at tls_gd_lo(-24)
+; GENDYNPIC-NEXT: and %s0, %s0, (32)0
+; GENDYNPIC-NEXT: sic %s10
+; GENDYNPIC-NEXT: lea.sl %s0, x at tls_gd_hi(%s10, %s0)
+; GENDYNPIC-NEXT: lea %s12, __tls_get_addr at plt_lo(8)
+; GENDYNPIC-NEXT: and %s12, %s12, (32)0
+; GENDYNPIC-NEXT: lea.sl %s12, __tls_get_addr at plt_hi(%s10, %s12)
+; GENDYNPIC-NEXT: bsic %s10, (, %s12)
+; GENDYNPIC-NEXT: stl %s18, (,%s0)
+; GENDYNPIC-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
+; GENDYNPIC-NEXT: or %s11, 0, %s9
+; GENDYNPIC-NEXT: ld %s16, 32(,%s11)
+; GENDYNPIC-NEXT: ld %s15, 24(,%s11)
+; GENDYNPIC-NEXT: ld %s10, 8(,%s11)
+; GENDYNPIC-NEXT: ld %s9, (,%s11)
+; GENDYNPIC-NEXT: b.l (,%lr)
+; LOCAL-LABEL: set_global:
+; LOCAL: .LBB{{[0-9]+}}_2:
+; LOCAL-NEXT: lea %s34, x at tpoff_lo
+; LOCAL-NEXT: and %s34, %s34, (32)0
+; LOCAL-NEXT: lea.sl %s34, x at tpoff_hi(%s34)
+; LOCAL-NEXT: adds.l %s34, %s14, %s34
+; LOCAL-NEXT: stl %s0, (,%s34)
+; LOCAL-NEXT: or %s11, 0, %s9
+entry:
+ store i32 %v, i32* @x, align 4
+ ret void
+}
+
+; Function Attrs: norecurse nounwind
+define void @set_local(i32 %v) {
+; GENDYN-LABEL: set_local:
+; GENDYN: # %bb.0: # %entry
+; GENDYN-NEXT: st %s9, (,%s11)
+; GENDYN-NEXT: st %s10, 8(,%s11)
+; GENDYN-NEXT: st %s15, 24(,%s11)
+; GENDYN-NEXT: st %s16, 32(,%s11)
+; GENDYN-NEXT: or %s9, 0, %s11
+; GENDYN-NEXT: lea %s13, -240
+; GENDYN-NEXT: and %s13, %s13, (32)0
+; GENDYN-NEXT: lea.sl %s11, -1(%s11, %s13)
+; GENDYN-NEXT: brge.l %s11, %s8, .LBB3_2
+; GENDYN-NEXT: # %bb.1: # %entry
+; GENDYN-NEXT: ld %s61, 24(,%s14)
+; GENDYN-NEXT: or %s62, 0, %s0
+; GENDYN-NEXT: lea %s63, 315
+; GENDYN-NEXT: shm.l %s63, (%s61)
+; GENDYN-NEXT: shm.l %s8, 8(%s61)
+; GENDYN-NEXT: shm.l %s11, 16(%s61)
+; GENDYN-NEXT: monc
+; GENDYN-NEXT: or %s0, 0, %s62
+; GENDYN-NEXT: .LBB3_2: # %entry
+; GENDYN-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
+; GENDYN-NEXT: or %s18, 0, %s0
+; GENDYN-NEXT: lea %s0, y at tls_gd_lo(-24)
+; GENDYN-NEXT: and %s0, %s0, (32)0
+; GENDYN-NEXT: sic %s10
+; GENDYN-NEXT: lea.sl %s0, y at tls_gd_hi(%s10, %s0)
+; GENDYN-NEXT: lea %s12, __tls_get_addr at plt_lo(8)
+; GENDYN-NEXT: and %s12, %s12, (32)0
+; GENDYN-NEXT: lea.sl %s12, __tls_get_addr at plt_hi(%s10, %s12)
+; GENDYN-NEXT: bsic %s10, (, %s12)
+; GENDYN-NEXT: stl %s18, (,%s0)
+; GENDYN-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
+; GENDYN-NEXT: or %s11, 0, %s9
+; GENDYN-NEXT: ld %s16, 32(,%s11)
+; GENDYN-NEXT: ld %s15, 24(,%s11)
+; GENDYN-NEXT: ld %s10, 8(,%s11)
+; GENDYN-NEXT: ld %s9, (,%s11)
+; GENDYN-NEXT: b.l (,%lr)
+;
+; GENDYNPIC-LABEL: set_local:
+; GENDYNPIC: # %bb.0: # %entry
+; GENDYNPIC-NEXT: st %s9, (,%s11)
+; GENDYNPIC-NEXT: st %s10, 8(,%s11)
+; GENDYNPIC-NEXT: st %s15, 24(,%s11)
+; GENDYNPIC-NEXT: st %s16, 32(,%s11)
+; GENDYNPIC-NEXT: or %s9, 0, %s11
+; GENDYNPIC-NEXT: lea %s13, -240
+; GENDYNPIC-NEXT: and %s13, %s13, (32)0
+; GENDYNPIC-NEXT: lea.sl %s11, -1(%s11, %s13)
+; GENDYNPIC-NEXT: brge.l %s11, %s8, .LBB3_2
+; GENDYNPIC-NEXT: # %bb.1: # %entry
+; GENDYNPIC-NEXT: ld %s61, 24(,%s14)
+; GENDYNPIC-NEXT: or %s62, 0, %s0
+; GENDYNPIC-NEXT: lea %s63, 315
+; GENDYNPIC-NEXT: shm.l %s63, (%s61)
+; GENDYNPIC-NEXT: shm.l %s8, 8(%s61)
+; GENDYNPIC-NEXT: shm.l %s11, 16(%s61)
+; GENDYNPIC-NEXT: monc
+; GENDYNPIC-NEXT: or %s0, 0, %s62
+; GENDYNPIC-NEXT: .LBB3_2: # %entry
+; GENDYNPIC-NEXT: st %s18, 48(,%s9) # 8-byte Folded Spill
+; GENDYNPIC-NEXT: or %s18, 0, %s0
+; GENDYNPIC-NEXT: lea %s15, _GLOBAL_OFFSET_TABLE_ at pc_lo(-24)
+; GENDYNPIC-NEXT: and %s15, %s15, (32)0
+; GENDYNPIC-NEXT: sic %s16
+; GENDYNPIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_ at pc_hi(%s16, %s15)
+; GENDYNPIC-NEXT: lea %s0, y at tls_gd_lo(-24)
+; GENDYNPIC-NEXT: and %s0, %s0, (32)0
+; GENDYNPIC-NEXT: sic %s10
+; GENDYNPIC-NEXT: lea.sl %s0, y at tls_gd_hi(%s10, %s0)
+; GENDYNPIC-NEXT: lea %s12, __tls_get_addr at plt_lo(8)
+; GENDYNPIC-NEXT: and %s12, %s12, (32)0
+; GENDYNPIC-NEXT: lea.sl %s12, __tls_get_addr at plt_hi(%s10, %s12)
+; GENDYNPIC-NEXT: bsic %s10, (, %s12)
+; GENDYNPIC-NEXT: stl %s18, (,%s0)
+; GENDYNPIC-NEXT: ld %s18, 48(,%s9) # 8-byte Folded Reload
+; GENDYNPIC-NEXT: or %s11, 0, %s9
+; GENDYNPIC-NEXT: ld %s16, 32(,%s11)
+; GENDYNPIC-NEXT: ld %s15, 24(,%s11)
+; GENDYNPIC-NEXT: ld %s10, 8(,%s11)
+; GENDYNPIC-NEXT: ld %s9, (,%s11)
+; GENDYNPIC-NEXT: b.l (,%lr)
+; LOCAL-LABEL: set_local:
+; LOCAL: .LBB{{[0-9]+}}_2:
+; LOCAL-NEXT: lea %s34, y at tpoff_lo
+; LOCAL-NEXT: and %s34, %s34, (32)0
+; LOCAL-NEXT: lea.sl %s34, y at tpoff_hi(%s34)
+; LOCAL-NEXT: adds.l %s34, %s14, %s34
+; LOCAL-NEXT: stl %s0, (,%s34)
+; LOCAL-NEXT: or %s11, 0, %s9
+entry:
+ store i32 %v, i32* @y, align 4
+ ret void
+}
More information about the llvm-commits
mailing list