[llvm] 82bb8a5 - [CSKY] Add codegen support of GlobalTLSAddress lowering
Zi Xuan Wu via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 20 22:43:46 PST 2022
Author: Zi Xuan Wu
Date: 2022-01-21T14:39:55+08:00
New Revision: 82bb8a588ddea104faadb54366e3d42b5857fc06
URL: https://github.com/llvm/llvm-project/commit/82bb8a588ddea104faadb54366e3d42b5857fc06
DIFF: https://github.com/llvm/llvm-project/commit/82bb8a588ddea104faadb54366e3d42b5857fc06.diff
LOG: [CSKY] Add codegen support of GlobalTLSAddress lowering
There are static and dynamic TLS address lowering in DAG stage according to different TLS model.
It needs PseudoTLSLA32 pseudo to get address of TLS-related entry which resides in constant pool.
Added:
llvm/test/CodeGen/CSKY/tls-models.ll
Modified:
llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
llvm/lib/Target/CSKY/CSKYAsmPrinter.h
llvm/lib/Target/CSKY/CSKYISelLowering.cpp
llvm/lib/Target/CSKY/CSKYISelLowering.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
index cfe97b971c53d..c8269eeacfdb2 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
@@ -58,6 +58,33 @@ void CSKYAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
// instructions) auto-generated.
#include "CSKYGenMCPseudoLowering.inc"
+void CSKYAsmPrinter::expandTLSLA(const MachineInstr *MI) {
+ const CSKYInstrInfo *TII = Subtarget->getInstrInfo();
+
+ DebugLoc DL = MI->getDebugLoc();
+
+ MCSymbol *PCLabel = OutContext.getOrCreateSymbol(
+ Twine(MAI->getPrivateGlobalPrefix()) + "PC" + Twine(getFunctionNumber()) +
+ "_" + Twine(MI->getOperand(3).getImm()));
+
+ OutStreamer->emitLabel(PCLabel);
+
+ auto Instr = BuildMI(*MF, DL, TII->get(CSKY::LRW32))
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(2));
+ MCInst LRWInst;
+ MCInstLowering.Lower(Instr, LRWInst);
+ EmitToStreamer(*OutStreamer, LRWInst);
+
+ Instr = BuildMI(*MF, DL, TII->get(CSKY::GRS32))
+ .add(MI->getOperand(1))
+ .addSym(PCLabel);
+ MCInst GRSInst;
+ MCInstLowering.Lower(Instr, GRSInst);
+ EmitToStreamer(*OutStreamer, GRSInst);
+ return;
+}
+
void CSKYAsmPrinter::emitCustomConstantPool(const MachineInstr *MI) {
// This instruction represents a floating constant pool in the function.
@@ -102,6 +129,9 @@ void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) {
InConstantPool = false;
}
+ if (MI->getOpcode() == CSKY::PseudoTLSLA32)
+ return expandTLSLA(MI);
+
if (MI->getOpcode() == CSKY::CONSTPOOL_ENTRY)
return emitCustomConstantPool(MI);
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
index 64c94f08eae4e..04a253d349c80 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
@@ -26,6 +26,7 @@ class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter {
/// MachineFunction.
MachineConstantPool *MCP;
+ void expandTLSLA(const MachineInstr *MI);
void emitCustomConstantPool(const MachineInstr *MI);
public:
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index b253e10574a74..c4d5d687216d1 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -119,6 +119,8 @@ SDValue CSKYTargetLowering::LowerOperation(SDValue Op,
return LowerGlobalAddress(Op, DAG);
case ISD::ExternalSymbol:
return LowerExternalSymbol(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ return LowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
case ISD::BlockAddress:
@@ -1005,3 +1007,116 @@ Register CSKYTargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
return CSKY::R1;
}
+
+SDValue CSKYTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT Ty = Op.getValueType();
+ GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
+ int64_t Offset = N->getOffset();
+ MVT XLenVT = MVT::i32;
+
+ TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
+ SDValue Addr;
+ switch (Model) {
+ case TLSModel::LocalExec:
+ Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
+ break;
+ case TLSModel::InitialExec:
+ Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
+ break;
+ case TLSModel::LocalDynamic:
+ case TLSModel::GeneralDynamic:
+ Addr = getDynamicTLSAddr(N, DAG);
+ break;
+ }
+
+ // In order to maximise the opportunity for common subexpression elimination,
+ // emit a separate ADD node for the global address offset instead of folding
+ // it in the global address node. Later peephole optimisations may choose to
+ // fold it back in when profitable.
+ if (Offset != 0)
+ return DAG.getNode(ISD::ADD, DL, Ty, Addr,
+ DAG.getConstant(Offset, DL, XLenVT));
+ return Addr;
+}
+
+SDValue CSKYTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
+ SelectionDAG &DAG,
+ bool UseGOT) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+
+ unsigned CSKYPCLabelIndex = CFI->createPICLabelUId();
+
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+
+ CSKYCP::CSKYCPModifier Flag = UseGOT ? CSKYCP::TLSIE : CSKYCP::TLSLE;
+ bool AddCurrentAddr = UseGOT ? true : false;
+ unsigned char PCAjust = UseGOT ? 4 : 0;
+
+ CSKYConstantPoolValue *CPV =
+ CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, PCAjust,
+ Flag, AddCurrentAddr, CSKYPCLabelIndex);
+ SDValue CAddr = DAG.getTargetConstantPool(CPV, Ty);
+
+ SDValue Load;
+ if (UseGOT) {
+ SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32);
+ auto *LRWGRS = DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty},
+ {CAddr, PICLabel});
+ auto LRWADDGRS =
+ DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1));
+ Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), LRWADDGRS,
+ MachinePointerInfo(N->getGlobal()));
+ } else {
+ Load = SDValue(DAG.getMachineNode(CSKY::LRW32, DL, Ty, CAddr), 0);
+ }
+
+ // Add the thread pointer.
+ SDValue TPReg = DAG.getRegister(CSKY::R31, MVT::i32);
+ return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
+}
+
+SDValue CSKYTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+
+ unsigned CSKYPCLabelIndex = CFI->createPICLabelUId();
+
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
+
+ CSKYConstantPoolValue *CPV =
+ CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, 4,
+ CSKYCP::TLSGD, true, CSKYPCLabelIndex);
+ SDValue Addr = DAG.getTargetConstantPool(CPV, Ty);
+ SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32);
+
+ auto *LRWGRS =
+ DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty}, {Addr, PICLabel});
+
+ auto Load =
+ DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1));
+
+ // Prepare argument list to generate call.
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Load;
+ Entry.Ty = CallTy;
+ Args.push_back(Entry);
+
+ // Setup call to __tls_get_addr.
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::C, CallTy,
+ DAG.getExternalSymbol("__tls_get_addr", Ty),
+ std::move(Args));
+ SDValue V = LowerCallTo(CLI).first;
+
+ return V;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h
index 3b3218b015e8f..e1744d5ce2203 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.h
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h
@@ -154,6 +154,10 @@ class CSKYTargetLowering : public TargetLowering {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
+ bool UseGOT) const;
+ SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
+
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) const;
};
diff --git a/llvm/test/CodeGen/CSKY/tls-models.ll b/llvm/test/CodeGen/CSKY/tls-models.ll
new file mode 100644
index 0000000000000..35dca36f515b2
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/tls-models.ll
@@ -0,0 +1,179 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=csky -csky-no-aliases -relocation-model=pic -mattr=+2e3 < %s \
+; RUN: | FileCheck -check-prefix=CSKY-PIC %s
+; RUN: llc -mtriple=csky -csky-no-aliases -mattr=+2e3 < %s | FileCheck -check-prefix=CSKY-NOPIC %s
+
+; Check that TLS symbols are lowered correctly based on the specified
+; model. Make sure they're external to avoid them all being optimised to Local
+; Exec for the executable.
+
+ at unspecified = external thread_local global i32
+ at ld = external thread_local(localdynamic) global i32
+ at ie = external thread_local(initialexec) global i32
+ at le = external thread_local(localexec) global i32
+
+
+; No model specified
+
+define i32* @f1() nounwind {
+; CSKY-PIC-LABEL: f1:
+; CSKY-PIC: # %bb.0: # %entry
+; CSKY-PIC-NEXT: subi16 sp, sp, 8
+; CSKY-PIC-NEXT: st32.w rgb, (sp, 4) # 4-byte Folded Spill
+; CSKY-PIC-NEXT: st32.w lr, (sp, 0) # 4-byte Folded Spill
+; CSKY-PIC-NEXT: lrw32 rgb, [.LCPI0_0]
+; CSKY-PIC-NEXT: .LPC0_1:
+; CSKY-PIC-NEXT: lrw32 a0, [.LCPI0_1]
+; CSKY-PIC-NEXT: grs32 a1, .LPC0_1
+; CSKY-PIC-NEXT: addu16 a0, a1
+; CSKY-PIC-NEXT: lrw32 a1, [.LCPI0_2]
+; CSKY-PIC-NEXT: ldr32.w a1, (rgb, a1 << 0)
+; CSKY-PIC-NEXT: jsr16 a1
+; CSKY-PIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload
+; CSKY-PIC-NEXT: ld32.w rgb, (sp, 4) # 4-byte Folded Reload
+; CSKY-PIC-NEXT: addi16 sp, sp, 8
+; CSKY-PIC-NEXT: rts16
+; CSKY-PIC-NEXT: .p2align 1
+; CSKY-PIC-NEXT: # %bb.1:
+; CSKY-PIC-NEXT: .p2align 2
+; CSKY-PIC-NEXT: .LCPI0_0:
+; CSKY-PIC-NEXT: .long _GLOBAL_OFFSET_TABLE_
+; CSKY-PIC-NEXT: .LCPI0_1:
+; CSKY-PIC-NEXT: .Ltmp0:
+; CSKY-PIC-NEXT: .long unspecified-(.LPC0_1-.Ltmp0)@TLSGD32
+; CSKY-PIC-NEXT: .LCPI0_2:
+; CSKY-PIC-NEXT: .long __tls_get_addr at PLT
+;
+; CSKY-NOPIC-LABEL: f1:
+; CSKY-NOPIC: # %bb.0: # %entry
+; CSKY-NOPIC-NEXT: .LPC0_1:
+; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI0_0]
+; CSKY-NOPIC-NEXT: grs32 a1, .LPC0_1
+; CSKY-NOPIC-NEXT: ldr32.w a0, (a0, a1 << 0)
+; CSKY-NOPIC-NEXT: addu32 a0, a0, tls
+; CSKY-NOPIC-NEXT: rts16
+; CSKY-NOPIC-NEXT: .p2align 1
+; CSKY-NOPIC-NEXT: # %bb.1:
+; CSKY-NOPIC-NEXT: .p2align 2
+; CSKY-NOPIC-NEXT: .LCPI0_0:
+; CSKY-NOPIC-NEXT: .Ltmp0:
+; CSKY-NOPIC-NEXT: .long unspecified-(.LPC0_1-.Ltmp0)@GOTTPOFF
+entry:
+ ret i32* @unspecified
+}
+
+
+; localdynamic specified
+
+define i32* @f2() nounwind {
+; CSKY-PIC-LABEL: f2:
+; CSKY-PIC: # %bb.0: # %entry
+; CSKY-PIC-NEXT: subi16 sp, sp, 8
+; CSKY-PIC-NEXT: st32.w rgb, (sp, 4) # 4-byte Folded Spill
+; CSKY-PIC-NEXT: st32.w lr, (sp, 0) # 4-byte Folded Spill
+; CSKY-PIC-NEXT: lrw32 rgb, [.LCPI1_0]
+; CSKY-PIC-NEXT: .LPC1_1:
+; CSKY-PIC-NEXT: lrw32 a0, [.LCPI1_1]
+; CSKY-PIC-NEXT: grs32 a1, .LPC1_1
+; CSKY-PIC-NEXT: addu16 a0, a1
+; CSKY-PIC-NEXT: lrw32 a1, [.LCPI1_2]
+; CSKY-PIC-NEXT: ldr32.w a1, (rgb, a1 << 0)
+; CSKY-PIC-NEXT: jsr16 a1
+; CSKY-PIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload
+; CSKY-PIC-NEXT: ld32.w rgb, (sp, 4) # 4-byte Folded Reload
+; CSKY-PIC-NEXT: addi16 sp, sp, 8
+; CSKY-PIC-NEXT: rts16
+; CSKY-PIC-NEXT: .p2align 1
+; CSKY-PIC-NEXT: # %bb.1:
+; CSKY-PIC-NEXT: .p2align 2
+; CSKY-PIC-NEXT: .LCPI1_0:
+; CSKY-PIC-NEXT: .long _GLOBAL_OFFSET_TABLE_
+; CSKY-PIC-NEXT: .LCPI1_1:
+; CSKY-PIC-NEXT: .Ltmp1:
+; CSKY-PIC-NEXT: .long ld-(.LPC1_1-.Ltmp1)@TLSGD32
+; CSKY-PIC-NEXT: .LCPI1_2:
+; CSKY-PIC-NEXT: .long __tls_get_addr at PLT
+;
+; CSKY-NOPIC-LABEL: f2:
+; CSKY-NOPIC: # %bb.0: # %entry
+; CSKY-NOPIC-NEXT: .LPC1_1:
+; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI1_0]
+; CSKY-NOPIC-NEXT: grs32 a1, .LPC1_1
+; CSKY-NOPIC-NEXT: ldr32.w a0, (a0, a1 << 0)
+; CSKY-NOPIC-NEXT: addu32 a0, a0, tls
+; CSKY-NOPIC-NEXT: rts16
+; CSKY-NOPIC-NEXT: .p2align 1
+; CSKY-NOPIC-NEXT: # %bb.1:
+; CSKY-NOPIC-NEXT: .p2align 2
+; CSKY-NOPIC-NEXT: .LCPI1_0:
+; CSKY-NOPIC-NEXT: .Ltmp1:
+; CSKY-NOPIC-NEXT: .long ld-(.LPC1_1-.Ltmp1)@GOTTPOFF
+entry:
+ ret i32* @ld
+}
+
+
+; initialexec specified
+
+define i32* @f3() nounwind {
+; CSKY-PIC-LABEL: f3:
+; CSKY-PIC: # %bb.0: # %entry
+; CSKY-PIC-NEXT: .LPC2_1:
+; CSKY-PIC-NEXT: lrw32 a0, [.LCPI2_0]
+; CSKY-PIC-NEXT: grs32 a1, .LPC2_1
+; CSKY-PIC-NEXT: ldr32.w a0, (a0, a1 << 0)
+; CSKY-PIC-NEXT: addu32 a0, a0, tls
+; CSKY-PIC-NEXT: rts16
+; CSKY-PIC-NEXT: .p2align 1
+; CSKY-PIC-NEXT: # %bb.1:
+; CSKY-PIC-NEXT: .p2align 2
+; CSKY-PIC-NEXT: .LCPI2_0:
+; CSKY-PIC-NEXT: .Ltmp2:
+; CSKY-PIC-NEXT: .long ie-(.LPC2_1-.Ltmp2)@GOTTPOFF
+;
+; CSKY-NOPIC-LABEL: f3:
+; CSKY-NOPIC: # %bb.0: # %entry
+; CSKY-NOPIC-NEXT: .LPC2_1:
+; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI2_0]
+; CSKY-NOPIC-NEXT: grs32 a1, .LPC2_1
+; CSKY-NOPIC-NEXT: ldr32.w a0, (a0, a1 << 0)
+; CSKY-NOPIC-NEXT: addu32 a0, a0, tls
+; CSKY-NOPIC-NEXT: rts16
+; CSKY-NOPIC-NEXT: .p2align 1
+; CSKY-NOPIC-NEXT: # %bb.1:
+; CSKY-NOPIC-NEXT: .p2align 2
+; CSKY-NOPIC-NEXT: .LCPI2_0:
+; CSKY-NOPIC-NEXT: .Ltmp2:
+; CSKY-NOPIC-NEXT: .long ie-(.LPC2_1-.Ltmp2)@GOTTPOFF
+entry:
+ ret i32* @ie
+}
+
+
+; localexec specified
+
+define i32* @f4() nounwind {
+; CSKY-PIC-LABEL: f4:
+; CSKY-PIC: # %bb.0: # %entry
+; CSKY-PIC-NEXT: lrw32 a0, [.LCPI3_0]
+; CSKY-PIC-NEXT: addu32 a0, a0, tls
+; CSKY-PIC-NEXT: rts16
+; CSKY-PIC-NEXT: .p2align 1
+; CSKY-PIC-NEXT: # %bb.1:
+; CSKY-PIC-NEXT: .p2align 2
+; CSKY-PIC-NEXT: .LCPI3_0:
+; CSKY-PIC-NEXT: .long le at TPOFF
+;
+; CSKY-NOPIC-LABEL: f4:
+; CSKY-NOPIC: # %bb.0: # %entry
+; CSKY-NOPIC-NEXT: lrw32 a0, [.LCPI3_0]
+; CSKY-NOPIC-NEXT: addu32 a0, a0, tls
+; CSKY-NOPIC-NEXT: rts16
+; CSKY-NOPIC-NEXT: .p2align 1
+; CSKY-NOPIC-NEXT: # %bb.1:
+; CSKY-NOPIC-NEXT: .p2align 2
+; CSKY-NOPIC-NEXT: .LCPI3_0:
+; CSKY-NOPIC-NEXT: .long le at TPOFF
+entry:
+ ret i32* @le
+}
More information about the llvm-commits
mailing list