[clang] [llvm] [X86] Add Support for X86 TLSDESC Relocations (PR #83136)

Phoebe Wang via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 6 02:53:57 PST 2024


https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/83136

>From cdc9ee6c322af0ceed162f3f714bcd0a22e020c3 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Tue, 27 Feb 2024 22:16:38 +0800
Subject: [PATCH 1/6] [X86] Add Support for X86 TLSDESC Relocations

---
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |   3 +-
 clang/test/Driver/tls-dialect.c               |   2 +-
 .../lib/Target/X86/MCTargetDesc/X86BaseInfo.h |  14 ++
 llvm/lib/Target/X86/X86AsmPrinter.cpp         |   2 +
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  98 +++++++----
 llvm/lib/Target/X86/X86MCInstLower.cpp        |  30 +++-
 llvm/test/CodeGen/X86/tls-desc.ll             | 165 ++++++++++++++++++
 7 files changed, 273 insertions(+), 41 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/tls-desc.ll

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index faceee85a2f8dc..c66e3ee12e50c4 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -740,7 +740,8 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC,
     SupportedArgument = V == "desc" || V == "trad";
     EnableTLSDESC = V == "desc";
   } else if (Triple.isX86()) {
-    SupportedArgument = V == "gnu";
+    SupportedArgument = V == "gnu" || V == "gnu2";
+    EnableTLSDESC = V == "gnu2";
   } else {
     Unsupported = true;
   }
diff --git a/clang/test/Driver/tls-dialect.c b/clang/test/Driver/tls-dialect.c
index f73915b28ec2a3..a808dd81531ce7 100644
--- a/clang/test/Driver/tls-dialect.c
+++ b/clang/test/Driver/tls-dialect.c
@@ -2,6 +2,7 @@
 // RUN: %clang -### --target=riscv64-linux -mtls-dialect=trad %s 2>&1 | FileCheck --check-prefix=NODESC %s
 // RUN: %clang -### --target=riscv64-linux %s 2>&1 | FileCheck --check-prefix=NODESC %s
 // RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu %s 2>&1 | FileCheck --check-prefix=NODESC %s
+// RUN: %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=DESC %s
 
 /// Android supports TLSDESC by default on RISC-V
 /// TLSDESC is not on by default in Linux, even on RISC-V, and is covered above
@@ -18,7 +19,6 @@
 
 /// Unsupported argument
 // RUN: not %clang -### --target=riscv64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
-// RUN: not %clang -### --target=x86_64-linux -mtls-dialect=gnu2 %s 2>&1 | FileCheck --check-prefix=UNSUPPORTED-ARG %s
 
 // DESC:       "-cc1" {{.*}}"-enable-tlsdesc"
 // NODESC-NOT: "-enable-tlsdesc"
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 4442b80861b61a..1877550f8c40bb 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -431,6 +431,20 @@ enum TOF {
   /// See 'ELF Handling for Thread-Local Storage' for more details.
   ///    SYMBOL_LABEL @TLSLDM
   MO_TLSLDM,
+  /// MO_TLSCALL - On a symbol operand this indicates that the immediate is
+  /// the index of the TLS descriptor function for the symbol. Used in both
+  /// the IA32 and x86-64 local dynamic TLS access model.
+  /// See 'RFC-TLSDESC-x86' for more details.
+  ///    SYMBOL_LABEL @TLSCALL
+  MO_TLSCALL,
+  /// MO_TLSDESC - On a symbol operand this indicates that the immediate is
+  /// the index of the TLS descriptor argument for the symbol. When this
+  /// argument is passed to a call getting from index at TLSCALL, the function will
+  /// return the offset for the symbol. Used in both the IA32 and x86-64 local
+  /// dynamic TLS access model.
+  /// See 'RFC-TLSDESC-x86' for more details.
+  ///    SYMBOL_LABEL @TLSDESC
+  MO_TLSDESC,
   /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
   /// the offset of the GOT entry with the thread-pointer offset for the
   /// symbol. Used in the x86-64 initial exec TLS access model.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 3395a13545e454..d8e111db1cec42 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -271,6 +271,8 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
   case X86II::MO_TLSGD:     O << "@TLSGD";     break;
   case X86II::MO_TLSLD:     O << "@TLSLD";     break;
   case X86II::MO_TLSLDM:    O << "@TLSLDM";    break;
+  case X86II::MO_TLSDESC:   O << "@TLSDESC";   break;
+  case X86II::MO_TLSCALL:   O << "@TLSCALL";   break;
   case X86II::MO_GOTTPOFF:  O << "@GOTTPOFF";  break;
   case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
   case X86II::MO_TPOFF:     O << "@TPOFF";     break;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a86f13135173b0..88314bcf510e9a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18515,17 +18515,17 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
 }
 
-static SDValue
-GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
-           SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
-           unsigned char OperandFlags, bool LocalDynamic = false) {
+static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
+                          GlobalAddressSDNode *GA, SDValue *InGlue,
+                          const EVT PtrVT, unsigned ReturnReg,
+                          unsigned char OperandFlags, bool UseTLSDESC = false,
+                          bool LocalDynamic = false) {
   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDLoc dl(GA);
-  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
-                                           GA->getValueType(0),
-                                           GA->getOffset(),
-                                           OperandFlags);
+  SDValue TGA = DAG.getTargetGlobalAddress(
+      GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(),
+      UseTLSDESC ? X86II::MO_TLSDESC : OperandFlags);
 
   X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
                                            : X86ISD::TLSADDR;
@@ -18543,13 +18543,27 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
   MFI.setHasCalls(true);
 
   SDValue Glue = Chain.getValue(1);
-  return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+  SDValue Ret = DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
+
+  if (!UseTLSDESC)
+    return Ret;
+
+  const X86Subtarget &Subtarget = DAG.getSubtarget<X86Subtarget>();
+  MVT VT = Subtarget.isTarget64BitLP64() ? MVT::i64 : MVT::i32;
+  unsigned Seg = Subtarget.is64Bit() ? X86AS::FS : X86AS::GS;
+
+  Value *Ptr = Constant::getNullValue(PointerType::get(*DAG.getContext(), Seg));
+  SDValue Offset =
+      DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl),
+                  MachinePointerInfo(Ptr));
+  return DAG.getNode(ISD::ADD, dl, VT, Ret, Offset);
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
-static SDValue
-LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
-                                const EVT PtrVT) {
+static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
+                                               SelectionDAG &DAG,
+                                               const EVT PtrVT,
+                                               bool UseTLSDESC) {
   SDValue InGlue;
   SDLoc dl(GA);  // ? function entry point might be better
   SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
@@ -18557,23 +18571,26 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                                                SDLoc(), PtrVT), InGlue);
   InGlue = Chain.getValue(1);
 
-  return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
+  return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD,
+                    UseTLSDESC);
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
-static SDValue
-LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
-                                const EVT PtrVT) {
-  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
-                    X86::RAX, X86II::MO_TLSGD);
+static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
+                                               SelectionDAG &DAG,
+                                               const EVT PtrVT,
+                                               bool UseTLSDESC) {
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
+                    X86II::MO_TLSGD, UseTLSDESC);
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
-static SDValue
-LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
-                                 const EVT PtrVT) {
-  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
-                    X86::EAX, X86II::MO_TLSGD);
+static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
+                                                SelectionDAG &DAG,
+                                                const EVT PtrVT,
+                                                bool UseTLSDESC) {
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX,
+                    X86II::MO_TLSGD, UseTLSDESC);
 }
 
 static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
@@ -18590,14 +18607,16 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
   if (Is64Bit) {
     unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
     Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
-                      X86II::MO_TLSLD, /*LocalDynamic=*/true);
+                      X86II::MO_TLSLD, /*UseTLSDESC=*/false,
+                      /*LocalDynamic=*/true);
   } else {
     SDValue InGlue;
     SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
         DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
     InGlue = Chain.getValue(1);
     Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
-                      X86II::MO_TLSLDM, /*LocalDynamic=*/true);
+                      X86II::MO_TLSLDM, /*UseTLSDESC=*/false,
+                      /*LocalDynamic=*/true);
   }
 
   // Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -18684,21 +18703,26 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
 
   if (Subtarget.isTargetELF()) {
     TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
+    bool UseTLSDESC = DAG.getTarget().useTLSDESC();
     switch (model) {
-      case TLSModel::GeneralDynamic:
-        if (Subtarget.is64Bit()) {
-          if (Subtarget.isTarget64BitLP64())
-            return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
-          return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
-        }
-        return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
-      case TLSModel::LocalDynamic:
+    case TLSModel::LocalDynamic:
+      if (!UseTLSDESC)
         return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
                                            Subtarget.isTarget64BitLP64());
-      case TLSModel::InitialExec:
-      case TLSModel::LocalExec:
-        return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
-                                   PositionIndependent);
+      [[fallthrough]];
+    case TLSModel::GeneralDynamic:
+      if (Subtarget.is64Bit()) {
+        if (Subtarget.isTarget64BitLP64()) {
+          // auto PtrVT = getPointerTy(DAG.getDataLayout(), X86AS::FS);
+          return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC);
+        }
+        return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC);
+      }
+      return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC);
+    case TLSModel::InitialExec:
+    case TLSModel::LocalExec:
+      return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
+                                 PositionIndependent);
     }
     llvm_unreachable("Unknown TLS model.");
   }
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index d3b7d97a83caf0..e447e17c2d7d09 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -257,6 +257,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case X86II::MO_TLSLDM:
     RefKind = MCSymbolRefExpr::VK_TLSLDM;
     break;
+  case X86II::MO_TLSDESC:
+    RefKind = MCSymbolRefExpr::VK_TLSDESC;
+    break;
+  case X86II::MO_TLSCALL:
+    RefKind = MCSymbolRefExpr::VK_TLSCALL;
+    break;
   case X86II::MO_GOTTPOFF:
     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
     break;
@@ -524,13 +530,14 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
                       MI.getOpcode() == X86::TLS_base_addr64;
   MCContext &Ctx = OutStreamer->getContext();
+  bool isTLSDESC = MI.getOperand(3).getTargetFlags() == X86II::MO_TLSDESC;
 
   MCSymbolRefExpr::VariantKind SRVK;
   switch (MI.getOpcode()) {
   case X86::TLS_addr32:
   case X86::TLS_addr64:
   case X86::TLS_addrX32:
-    SRVK = MCSymbolRefExpr::VK_TLSGD;
+    SRVK = isTLSDESC ? MCSymbolRefExpr::VK_TLSDESC : MCSymbolRefExpr::VK_TLSGD;
     break;
   case X86::TLS_base_addr32:
     SRVK = MCSymbolRefExpr::VK_TLSLDM;
@@ -554,7 +561,26 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
                 Ctx.getAsmInfo()->canRelaxRelocations();
 
-  if (Is64Bits) {
+  if (isTLSDESC) {
+    const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
+        MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)),
+        MCSymbolRefExpr::VK_TLSCALL, Ctx);
+    EmitAndCountInstruction(
+        MCInstBuilder(Is64BitsLP64 ? X86::LEA64r : X86::LEA32r)
+            .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+            .addReg(Is64Bits ? X86::RIP : X86::EBX)
+            .addImm(1)
+            .addReg(0)
+            .addExpr(Sym)
+            .addReg(0));
+    EmitAndCountInstruction(
+        MCInstBuilder(Is64Bits ? X86::CALL64m : X86::CALL32m)
+            .addReg(Is64BitsLP64 ? X86::RAX : X86::EAX)
+            .addImm(1)
+            .addReg(0)
+            .addExpr(Expr)
+            .addReg(0));
+  } else if (Is64Bits) {
     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
     if (NeedsPadding && Is64BitsLP64)
       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
new file mode 100644
index 00000000000000..ed363df6f78a43
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=i686-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
+
+ at x = thread_local global i32 0, align 4
+ at y = internal thread_local global i32 0, align 4
+
+define ptr @f1() nounwind {
+; X86-LABEL: f1:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    calll .L0$pb
+; X86-NEXT:  .L0$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp0:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
+; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x at tlscall(%eax)
+; X86-NEXT:    addl %gs:0, %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f1:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    leal x at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x at tlscall(%eax)
+; X32-NEXT:    # kill: def $eax killed $eax def $rax
+; X32-NEXT:    addl %fs:0, %eax
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f1:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x at tlscall(%rax)
+; X64-NEXT:    addq %fs:0, %rax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+  ret ptr %1
+}
+
+define i32 @f2() nounwind {
+; X86-LABEL: f2:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    calll .L1$pb
+; X86-NEXT:  .L1$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp1:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx
+; X86-NEXT:    movl %gs:0, %esi
+; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x at tlscall(%eax)
+; X86-NEXT:    movl (%eax,%esi), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f2:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rbx
+; X32-NEXT:    movl %fs:0, %ebx
+; X32-NEXT:    leal x at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x at tlscall(%eax)
+; X32-NEXT:    movl (%eax,%ebx), %eax
+; X32-NEXT:    popq %rbx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f2:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %fs:0, %rbx
+; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x at tlscall(%rax)
+; X64-NEXT:    movl (%rax,%rbx), %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+  %2 = load i32, ptr %1
+  ret i32 %2
+}
+
+define ptr @f3() nounwind {
+; X86-LABEL: f3:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    calll .L2$pb
+; X86-NEXT:  .L2$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp2:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.L2$pb), %ebx
+; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x at tlscall(%eax)
+; X86-NEXT:    addl %gs:0, %eax
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f3:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    leal x at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x at tlscall(%eax)
+; X32-NEXT:    # kill: def $eax killed $eax def $rax
+; X32-NEXT:    addl %fs:0, %eax
+; X32-NEXT:    popq %rcx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f3:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x at tlscall(%rax)
+; X64-NEXT:    addq %fs:0, %rax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+  ret ptr %1
+}
+
+define i32 @f4() nounwind {
+; X86-LABEL: f4:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    calll .L3$pb
+; X86-NEXT:  .L3$pb:
+; X86-NEXT:    popl %ebx
+; X86-NEXT:  .Ltmp3:
+; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
+; X86-NEXT:    movl %gs:0, %esi
+; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *x at tlscall(%eax)
+; X86-NEXT:    movl (%eax,%esi), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+;
+; X32-LABEL: f4:
+; X32:       # %bb.0:
+; X32-NEXT:    pushq %rbx
+; X32-NEXT:    movl %fs:0, %ebx
+; X32-NEXT:    leal x at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *x at tlscall(%eax)
+; X32-NEXT:    movl (%eax,%ebx), %eax
+; X32-NEXT:    popq %rbx
+; X32-NEXT:    retq
+;
+; X64-LABEL: f4:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %fs:0, %rbx
+; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *x at tlscall(%rax)
+; X64-NEXT:    movl (%rax,%rbx), %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
+  %2 = load i32, ptr %1
+  ret i32 %2
+}

>From 6eb75d6c99c926c055d809207dea918dd3ab4bf5 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Wed, 28 Feb 2024 21:22:00 +0800
Subject: [PATCH 2/6] Address review comments

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 16 +++---
 llvm/test/CodeGen/X86/tls-desc.ll       | 71 ++++++++++++++++++++++---
 2 files changed, 72 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 88314bcf510e9a..1feaad80c8e35a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18515,7 +18515,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
 }
 
-static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain,
+static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain,
                           GlobalAddressSDNode *GA, SDValue *InGlue,
                           const EVT PtrVT, unsigned ReturnReg,
                           unsigned char OperandFlags, bool UseTLSDESC = false,
@@ -18571,7 +18571,7 @@ static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
                                                SDLoc(), PtrVT), InGlue);
   InGlue = Chain.getValue(1);
 
-  return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD,
+  return getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD,
                     UseTLSDESC);
 }
 
@@ -18580,7 +18580,7 @@ static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
                                                SelectionDAG &DAG,
                                                const EVT PtrVT,
                                                bool UseTLSDESC) {
-  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
+  return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
                     X86II::MO_TLSGD, UseTLSDESC);
 }
 
@@ -18589,7 +18589,7 @@ static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
                                                 SelectionDAG &DAG,
                                                 const EVT PtrVT,
                                                 bool UseTLSDESC) {
-  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX,
+  return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX,
                     X86II::MO_TLSGD, UseTLSDESC);
 }
 
@@ -18606,7 +18606,7 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
   SDValue Base;
   if (Is64Bit) {
     unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
-    Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
+    Base = getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
                       X86II::MO_TLSLD, /*UseTLSDESC=*/false,
                       /*LocalDynamic=*/true);
   } else {
@@ -18614,7 +18614,7 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
     SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
         DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
     InGlue = Chain.getValue(1);
-    Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
+    Base = getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
                       X86II::MO_TLSLDM, /*UseTLSDESC=*/false,
                       /*LocalDynamic=*/true);
   }
@@ -18712,10 +18712,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
       [[fallthrough]];
     case TLSModel::GeneralDynamic:
       if (Subtarget.is64Bit()) {
-        if (Subtarget.isTarget64BitLP64()) {
-          // auto PtrVT = getPointerTy(DAG.getDataLayout(), X86AS::FS);
+        if (Subtarget.isTarget64BitLP64())
           return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC);
-        }
         return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC);
       }
       return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC);
diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
index ed363df6f78a43..7df1a5a09ce1ee 100644
--- a/llvm/test/CodeGen/X86/tls-desc.ll
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -1,46 +1,105 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc < %s -mtriple=i686-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=i686 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X86
 ; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64 --relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=X64
 
 @x = thread_local global i32 0, align 4
 @y = internal thread_local global i32 0, align 4
 
+;; FIXME: GCC's x86-64 port assumes that FLAGS_REG and RAX are changed while all other registers are preserved.
 define ptr @f1() nounwind {
 ; X86-LABEL: f1:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $12, %esp
 ; X86-NEXT:    calll .L0$pb
 ; X86-NEXT:  .L0$pb:
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:  .Ltmp0:
 ; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
+; X86-NEXT:    #APP
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
 ; X86-NEXT:    calll *x at tlscall(%eax)
 ; X86-NEXT:    addl %gs:0, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movl (%esp), %ebx # 4-byte Reload
+; X86-NEXT:    #APP
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
 ;
 ; X32-LABEL: f1:
 ; X32:       # %bb.0:
+; X32-NEXT:    pushq %rbp
+; X32-NEXT:    pushq %r15
+; X32-NEXT:    pushq %r14
+; X32-NEXT:    pushq %r13
+; X32-NEXT:    pushq %r12
+; X32-NEXT:    pushq %rbx
 ; X32-NEXT:    pushq %rax
+; X32-NEXT:    #APP
+; X32-NEXT:    #NO_APP
 ; X32-NEXT:    leal x at tlsdesc(%rip), %eax
 ; X32-NEXT:    callq *x at tlscall(%eax)
 ; X32-NEXT:    # kill: def $eax killed $eax def $rax
 ; X32-NEXT:    addl %fs:0, %eax
-; X32-NEXT:    popq %rcx
+; X32-NEXT:    #APP
+; X32-NEXT:    #NO_APP
+; X32-NEXT:    addl $8, %esp
+; X32-NEXT:    popq %rbx
+; X32-NEXT:    popq %r12
+; X32-NEXT:    popq %r13
+; X32-NEXT:    popq %r14
+; X32-NEXT:    popq %r15
+; X32-NEXT:    popq %rbp
 ; X32-NEXT:    retq
 ;
 ; X64-LABEL: f1:
 ; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    pushq %rax
+; X64-NEXT:    #APP
+; X64-NEXT:    #NO_APP
 ; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
 ; X64-NEXT:    callq *x at tlscall(%rax)
 ; X64-NEXT:    addq %fs:0, %rax
-; X64-NEXT:    popq %rcx
+; X64-NEXT:    #APP
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    addq $8, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
 ; X64-NEXT:    retq
-  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
-  ret ptr %1
+  %a = call { i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=r,=r,=r,=r,=r,=r,~{dirflag},~{fpsr},~{flags}"()
+  %b = call ptr @llvm.threadlocal.address.p0(ptr @x)
+  %a.0 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 0
+  %a.1 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 1
+  %a.2 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 2
+  %a.3 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 3
+  %a.4 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 4
+  %a.5 = extractvalue { i32, i32, i32, i32, i32, i32 } %a, 5
+  call void asm sideeffect "", "r,r,r,r,r,r,~{dirflag},~{fpsr},~{flags}"(i32 %a.0, i32 %a.1, i32 %a.2, i32 %a.3, i32 %a.4, i32 %a.5)
+  ret ptr %b
 }
 
 define i32 @f2() nounwind {

>From 10ea12723c825affa71455d2d921ea19ed4d605e Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Thu, 29 Feb 2024 15:51:54 +0800
Subject: [PATCH 3/6] Define a new node for TLSDESC

---
 .../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 14 ----
 llvm/lib/Target/X86/X86AsmPrinter.cpp         |  2 -
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 11 ++-
 llvm/lib/Target/X86/X86ISelLowering.h         |  4 +
 llvm/lib/Target/X86/X86InstrCompiler.td       | 10 +++
 llvm/lib/Target/X86/X86InstrFragments.td      |  3 +
 llvm/lib/Target/X86/X86MCInstLower.cpp        | 23 +++--
 llvm/test/CodeGen/X86/tls-desc.ll             | 83 ++++++-------------
 8 files changed, 60 insertions(+), 90 deletions(-)

diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 1877550f8c40bb..4442b80861b61a 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -431,20 +431,6 @@ enum TOF {
   /// See 'ELF Handling for Thread-Local Storage' for more details.
   ///    SYMBOL_LABEL @TLSLDM
   MO_TLSLDM,
-  /// MO_TLSCALL - On a symbol operand this indicates that the immediate is
-  /// the index of the TLS descriptor function for the symbol. Used in both
-  /// the IA32 and x86-64 local dynamic TLS access model.
-  /// See 'RFC-TLSDESC-x86' for more details.
-  ///    SYMBOL_LABEL @TLSCALL
-  MO_TLSCALL,
-  /// MO_TLSDESC - On a symbol operand this indicates that the immediate is
-  /// the index of the TLS descriptor argument for the symbol. When this
-  /// argument is passed to a call getting from index at TLSCALL, the function will
-  /// return the offset for the symbol. Used in both the IA32 and x86-64 local
-  /// dynamic TLS access model.
-  /// See 'RFC-TLSDESC-x86' for more details.
-  ///    SYMBOL_LABEL @TLSDESC
-  MO_TLSDESC,
   /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
   /// the offset of the GOT entry with the thread-pointer offset for the
   /// symbol. Used in the x86-64 initial exec TLS access model.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index d8e111db1cec42..3395a13545e454 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -271,8 +271,6 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
   case X86II::MO_TLSGD:     O << "@TLSGD";     break;
   case X86II::MO_TLSLD:     O << "@TLSLD";     break;
   case X86II::MO_TLSLDM:    O << "@TLSLDM";    break;
-  case X86II::MO_TLSDESC:   O << "@TLSDESC";   break;
-  case X86II::MO_TLSCALL:   O << "@TLSCALL";   break;
   case X86II::MO_GOTTPOFF:  O << "@GOTTPOFF";  break;
   case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
   case X86II::MO_TPOFF:     O << "@TPOFF";     break;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1feaad80c8e35a..437c1b432dae4d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18524,11 +18524,11 @@ static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain,
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDLoc dl(GA);
   SDValue TGA = DAG.getTargetGlobalAddress(
-      GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(),
-      UseTLSDESC ? X86II::MO_TLSDESC : OperandFlags);
+      GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags);
 
-  X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
-                                           : X86ISD::TLSADDR;
+  X86ISD::NodeType CallType = UseTLSDESC     ? X86ISD::TLSDESC
+                              : LocalDynamic ? X86ISD::TLSBASEADDR
+                                             : X86ISD::TLSADDR;
 
   if (InGlue) {
     SDValue Ops[] = { Chain,  TGA, *InGlue };
@@ -33338,6 +33338,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(TLSADDR)
   NODE_NAME_CASE(TLSBASEADDR)
   NODE_NAME_CASE(TLSCALL)
+  NODE_NAME_CASE(TLSDESC)
   NODE_NAME_CASE(EH_SJLJ_SETJMP)
   NODE_NAME_CASE(EH_SJLJ_LONGJMP)
   NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH)
@@ -36118,6 +36119,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case X86::TLS_base_addr32:
   case X86::TLS_base_addr64:
   case X86::TLS_base_addrX32:
+  case X86::TLS_desc32:
+  case X86::TLS_desc64:
     return EmitLoweredTLSAddr(MI, BB);
   case X86::INDIRECT_THUNK_CALL32:
   case X86::INDIRECT_THUNK_CALL64:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index f93c54781846bf..e0e39a6e7c905d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -295,6 +295,10 @@ namespace llvm {
     // thunk at the address from an earlier relocation.
     TLSCALL,
 
+    // Thread Local Storage. A descriptor containing pointer to
+    // code and to argument to get the TLS offset for the symbol.
+    TLSDESC,
+
     // Exception Handling helpers.
     EH_RETURN,
 
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 9f1712274bc304..04cffe43c9eb63 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -507,6 +507,16 @@ def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
                   Requires<[In64BitMode, NotLP64]>;
 }
 
+// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent
+// stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
+let Defs = [EAX, EFLAGS], usesCustomInserter = 1, Uses = [RSP, SSP] in {
+  def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                     "# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>;
+  def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+                     "# TLS_desc64", [(X86tlsdesc tls64addr:$sym)]>;
+}
+
 // Darwin TLS Support
 // For i386, the address of the thunk is passed on the stack, on return the
 // address of the variable is in %eax.  %ecx is trashed during the function
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index adf527d72f5b43..f14c7200af968a 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -223,6 +223,9 @@ def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
 def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
+def X86tlsdesc : SDNode<"X86ISD::TLSDESC", SDT_X86TLSADDR,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
 def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
                         [SDNPHasChain]>;
 
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index e447e17c2d7d09..e0585e697d4d42 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -257,12 +257,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case X86II::MO_TLSLDM:
     RefKind = MCSymbolRefExpr::VK_TLSLDM;
     break;
-  case X86II::MO_TLSDESC:
-    RefKind = MCSymbolRefExpr::VK_TLSDESC;
-    break;
-  case X86II::MO_TLSCALL:
-    RefKind = MCSymbolRefExpr::VK_TLSCALL;
-    break;
   case X86II::MO_GOTTPOFF:
     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
     break;
@@ -525,19 +519,18 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
                                  const MachineInstr &MI) {
   NoAutoPaddingScope NoPadScope(*OutStreamer);
-  bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
-                  MI.getOpcode() != X86::TLS_base_addr32;
+  bool Is64Bits = getSubtarget().is64Bit();
   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
-                      MI.getOpcode() == X86::TLS_base_addr64;
+                      MI.getOpcode() == X86::TLS_base_addr64 ||
+                      MI.getOpcode() == X86::TLS_desc64;
   MCContext &Ctx = OutStreamer->getContext();
-  bool isTLSDESC = MI.getOperand(3).getTargetFlags() == X86II::MO_TLSDESC;
 
   MCSymbolRefExpr::VariantKind SRVK;
   switch (MI.getOpcode()) {
   case X86::TLS_addr32:
   case X86::TLS_addr64:
   case X86::TLS_addrX32:
-    SRVK = isTLSDESC ? MCSymbolRefExpr::VK_TLSDESC : MCSymbolRefExpr::VK_TLSGD;
+    SRVK = MCSymbolRefExpr::VK_TLSGD;
     break;
   case X86::TLS_base_addr32:
     SRVK = MCSymbolRefExpr::VK_TLSLDM;
@@ -546,6 +539,10 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
   case X86::TLS_base_addrX32:
     SRVK = MCSymbolRefExpr::VK_TLSLD;
     break;
+  case X86::TLS_desc32:
+  case X86::TLS_desc64:
+    SRVK = MCSymbolRefExpr::VK_TLSDESC;
+    break;
   default:
     llvm_unreachable("unexpected opcode");
   }
@@ -561,7 +558,7 @@ void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
                 Ctx.getAsmInfo()->canRelaxRelocations();
 
-  if (isTLSDESC) {
+  if (SRVK == MCSymbolRefExpr::VK_TLSDESC) {
     const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(
         MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)),
         MCSymbolRefExpr::VK_TLSCALL, Ctx);
@@ -2190,6 +2187,8 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
   case X86::TLS_base_addr32:
   case X86::TLS_base_addr64:
   case X86::TLS_base_addrX32:
+  case X86::TLS_desc32:
+  case X86::TLS_desc64:
     return LowerTlsAddr(MCInstLowering, *MI);
 
   case X86::MOVPC32r: {
diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
index 7df1a5a09ce1ee..303acbf5aa96f4 100644
--- a/llvm/test/CodeGen/X86/tls-desc.ll
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -6,7 +6,6 @@
 @x = thread_local global i32 0, align 4
 @y = internal thread_local global i32 0, align 4
 
-;; FIXME: GCC's x86-64 port assumes that FLAGS_REG and RAX are changed while all other registers are preserved.
 define ptr @f1() nounwind {
 ; X86-LABEL: f1:
 ; X86:       # %bb.0:
@@ -14,7 +13,7 @@ define ptr @f1() nounwind {
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    pushl %eax
 ; X86-NEXT:    calll .L0$pb
 ; X86-NEXT:  .L0$pb:
 ; X86-NEXT:    popl %ebx
@@ -22,18 +21,14 @@ define ptr @f1() nounwind {
 ; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ebx
 ; X86-NEXT:    #APP
 ; X86-NEXT:    #NO_APP
-; X86-NEXT:    movl %edx, (%esp) # 4-byte Spill
-; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
 ; X86-NEXT:    calll *x at tlscall(%eax)
 ; X86-NEXT:    addl %gs:0, %eax
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; X86-NEXT:    movl (%esp), %ebx # 4-byte Reload
 ; X86-NEXT:    #APP
 ; X86-NEXT:    #NO_APP
-; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    addl $4, %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
@@ -42,12 +37,6 @@ define ptr @f1() nounwind {
 ;
 ; X32-LABEL: f1:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbp
-; X32-NEXT:    pushq %r15
-; X32-NEXT:    pushq %r14
-; X32-NEXT:    pushq %r13
-; X32-NEXT:    pushq %r12
-; X32-NEXT:    pushq %rbx
 ; X32-NEXT:    pushq %rax
 ; X32-NEXT:    #APP
 ; X32-NEXT:    #NO_APP
@@ -57,23 +46,11 @@ define ptr @f1() nounwind {
 ; X32-NEXT:    addl %fs:0, %eax
 ; X32-NEXT:    #APP
 ; X32-NEXT:    #NO_APP
-; X32-NEXT:    addl $8, %esp
-; X32-NEXT:    popq %rbx
-; X32-NEXT:    popq %r12
-; X32-NEXT:    popq %r13
-; X32-NEXT:    popq %r14
-; X32-NEXT:    popq %r15
-; X32-NEXT:    popq %rbp
+; X32-NEXT:    popq %rcx
 ; X32-NEXT:    retq
 ;
 ; X64-LABEL: f1:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbp
-; X64-NEXT:    pushq %r15
-; X64-NEXT:    pushq %r14
-; X64-NEXT:    pushq %r13
-; X64-NEXT:    pushq %r12
-; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    pushq %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    #NO_APP
@@ -82,13 +59,7 @@ define ptr @f1() nounwind {
 ; X64-NEXT:    addq %fs:0, %rax
 ; X64-NEXT:    #APP
 ; X64-NEXT:    #NO_APP
-; X64-NEXT:    addq $8, %rsp
-; X64-NEXT:    popq %rbx
-; X64-NEXT:    popq %r12
-; X64-NEXT:    popq %r13
-; X64-NEXT:    popq %r14
-; X64-NEXT:    popq %r15
-; X64-NEXT:    popq %rbp
+; X64-NEXT:    popq %rcx
 ; X64-NEXT:    retq
   %a = call { i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=r,=r,=r,=r,=r,=r,~{dirflag},~{fpsr},~{flags}"()
   %b = call ptr @llvm.threadlocal.address.p0(ptr @x)
@@ -106,38 +77,36 @@ define i32 @f2() nounwind {
 ; X86-LABEL: f2:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    calll .L1$pb
 ; X86-NEXT:  .L1$pb:
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:  .Ltmp1:
 ; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.L1$pb), %ebx
-; X86-NEXT:    movl %gs:0, %esi
+; X86-NEXT:    movl %gs:0, %ecx
 ; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
 ; X86-NEXT:    calll *x at tlscall(%eax)
-; X86-NEXT:    movl (%eax,%esi), %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    movl (%eax,%ecx), %eax
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
 ;
 ; X32-LABEL: f2:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %fs:0, %ebx
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    movl %fs:0, %ecx
 ; X32-NEXT:    leal x at tlsdesc(%rip), %eax
 ; X32-NEXT:    callq *x at tlscall(%eax)
-; X32-NEXT:    movl (%eax,%ebx), %eax
-; X32-NEXT:    popq %rbx
+; X32-NEXT:    movl (%eax,%ecx), %eax
+; X32-NEXT:    popq %rcx
 ; X32-NEXT:    retq
 ;
 ; X64-LABEL: f2:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    movq %fs:0, %rbx
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movq %fs:0, %rcx
 ; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
 ; X64-NEXT:    callq *x at tlscall(%rax)
-; X64-NEXT:    movl (%rax,%rbx), %eax
-; X64-NEXT:    popq %rbx
+; X64-NEXT:    movl (%rax,%rcx), %eax
+; X64-NEXT:    popq %rcx
 ; X64-NEXT:    retq
   %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
   %2 = load i32, ptr %1
@@ -185,38 +154,36 @@ define i32 @f4() nounwind {
 ; X86-LABEL: f4:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    calll .L3$pb
 ; X86-NEXT:  .L3$pb:
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:  .Ltmp3:
 ; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
-; X86-NEXT:    movl %gs:0, %esi
+; X86-NEXT:    movl %gs:0, %ecx
 ; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
 ; X86-NEXT:    calll *x at tlscall(%eax)
-; X86-NEXT:    movl (%eax,%esi), %eax
-; X86-NEXT:    popl %esi
+; X86-NEXT:    movl (%eax,%ecx), %eax
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
 ;
 ; X32-LABEL: f4:
 ; X32:       # %bb.0:
-; X32-NEXT:    pushq %rbx
-; X32-NEXT:    movl %fs:0, %ebx
+; X32-NEXT:    pushq %rax
+; X32-NEXT:    movl %fs:0, %ecx
 ; X32-NEXT:    leal x at tlsdesc(%rip), %eax
 ; X32-NEXT:    callq *x at tlscall(%eax)
-; X32-NEXT:    movl (%eax,%ebx), %eax
-; X32-NEXT:    popq %rbx
+; X32-NEXT:    movl (%eax,%ecx), %eax
+; X32-NEXT:    popq %rcx
 ; X32-NEXT:    retq
 ;
 ; X64-LABEL: f4:
 ; X64:       # %bb.0:
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    movq %fs:0, %rbx
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    movq %fs:0, %rcx
 ; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
 ; X64-NEXT:    callq *x at tlscall(%rax)
-; X64-NEXT:    movl (%rax,%rbx), %eax
-; X64-NEXT:    popq %rbx
+; X64-NEXT:    movl (%rax,%rcx), %eax
+; X64-NEXT:    popq %rcx
 ; X64-NEXT:    retq
   %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
   %2 = load i32, ptr %1

>From 22f8bff277204fe8c37e7ea732e593b2ca5abdd7 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Mon, 4 Mar 2024 21:58:28 +0800
Subject: [PATCH 4/6] Add one more internal thread_local

---
 llvm/test/CodeGen/X86/tls-desc.ll | 45 ++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
index 303acbf5aa96f4..6fc4ffdccf6901 100644
--- a/llvm/test/CodeGen/X86/tls-desc.ll
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -5,6 +5,7 @@
 
 @x = thread_local global i32 0, align 4
 @y = internal thread_local global i32 0, align 4
+ at z = internal thread_local global i32 1, align 4
 
 define ptr @f1() nounwind {
 ; X86-LABEL: f1:
@@ -159,33 +160,47 @@ define i32 @f4() nounwind {
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:  .Ltmp3:
 ; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
-; X86-NEXT:    movl %gs:0, %ecx
-; X86-NEXT:    leal x at tlsdesc(%ebx), %eax
-; X86-NEXT:    calll *x at tlscall(%eax)
-; X86-NEXT:    movl (%eax,%ecx), %eax
+; X86-NEXT:    leal y at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *y at tlscall(%eax)
+; X86-NEXT:    movl %gs:0, %edx
+; X86-NEXT:    movl (%eax,%edx), %ecx
+; X86-NEXT:    leal z at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *z at tlscall(%eax)
+; X86-NEXT:    addl (%eax,%edx), %ecx
+; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
 ;
 ; X32-LABEL: f4:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rax
-; X32-NEXT:    movl %fs:0, %ecx
-; X32-NEXT:    leal x at tlsdesc(%rip), %eax
-; X32-NEXT:    callq *x at tlscall(%eax)
-; X32-NEXT:    movl (%eax,%ecx), %eax
+; X32-NEXT:    leal y at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *y at tlscall(%eax)
+; X32-NEXT:    movl %fs:0, %edx
+; X32-NEXT:    movl (%eax,%edx), %ecx
+; X32-NEXT:    leal z at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *z at tlscall(%eax)
+; X32-NEXT:    addl (%eax,%edx), %ecx
+; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    popq %rcx
 ; X32-NEXT:    retq
 ;
 ; X64-LABEL: f4:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pushq %rax
-; X64-NEXT:    movq %fs:0, %rcx
-; X64-NEXT:    leaq x at tlsdesc(%rip), %rax
-; X64-NEXT:    callq *x at tlscall(%rax)
-; X64-NEXT:    movl (%rax,%rcx), %eax
+; X64-NEXT:    leaq y at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *y at tlscall(%rax)
+; X64-NEXT:    # kill: def $rax killed $rax killed $eax
+; X64-NEXT:    movq %fs:0, %rdx
+; X64-NEXT:    movl (%rax,%rdx), %ecx
+; X64-NEXT:    leaq z at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *z at tlscall(%rax)
+; X64-NEXT:    addl (%rax,%rdx), %ecx
+; X64-NEXT:    movl %ecx, %eax
 ; X64-NEXT:    popq %rcx
 ; X64-NEXT:    retq
-  %1 = tail call ptr @llvm.threadlocal.address.p0(ptr @x)
-  %2 = load i32, ptr %1
-  ret i32 %2
+  %1 = load i32, ptr @y, align 4
+  %2 = load i32, ptr @z, align 4
+  %3 = add nsw i32 %1, %2
+  ret i32 %3
 }

>From c2b992002f9f9f19d2b4d6f78b9c35aedbbffab3 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Wed, 6 Mar 2024 18:32:23 +0800
Subject: [PATCH 5/6] User _TLS_MODULE_BASE_ for local dynamic model

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 16 +++++++++----
 llvm/lib/Target/X86/X86ISelLowering.cpp | 28 +++++++++++++---------
 llvm/test/CodeGen/X86/tls-desc.ll       | 31 ++++++++++---------------
 llvm/test/CodeGen/X86/tls-desc.s        |  0
 4 files changed, 40 insertions(+), 35 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/tls-desc.s

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index c8f80ced354538..1db37f83e5eccc 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -3088,13 +3088,19 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
                                         SDValue &Scale, SDValue &Index,
                                         SDValue &Disp, SDValue &Segment) {
-  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
-  auto *GA = cast<GlobalAddressSDNode>(N);
+  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress ||
+         N.getOpcode() == ISD::TargetExternalSymbol);
 
   X86ISelAddressMode AM;
-  AM.GV = GA->getGlobal();
-  AM.Disp += GA->getOffset();
-  AM.SymbolFlags = GA->getTargetFlags();
+  if (auto *GA = dyn_cast<GlobalAddressSDNode>(N)) {
+    AM.GV = GA->getGlobal();
+    AM.Disp += GA->getOffset();
+    AM.SymbolFlags = GA->getTargetFlags();
+  } else {
+    auto *SA = cast<ExternalSymbolSDNode>(N);
+    AM.ES = SA->getSymbol();
+    AM.SymbolFlags = SA->getTargetFlags();
+  }
 
   if (Subtarget->is32Bit()) {
     AM.Scale = 1;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 437c1b432dae4d..9c5f24c9a31c37 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18523,8 +18523,16 @@ static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain,
   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDLoc dl(GA);
-  SDValue TGA = DAG.getTargetGlobalAddress(
-      GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags);
+  SDValue TGA;
+  if (LocalDynamic && UseTLSDESC) {
+    TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags);
+    auto UI = TGA->use_begin();
+    if (UI != TGA->use_end())
+      return SDValue(*UI->use_begin()->use_begin(), 0);
+  } else {
+    TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0),
+                                     GA->getOffset(), OperandFlags);
+  }
 
   X86ISD::NodeType CallType = UseTLSDESC     ? X86ISD::TLSDESC
                               : LocalDynamic ? X86ISD::TLSBASEADDR
@@ -18595,7 +18603,8 @@ static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
 
 static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
                                            SelectionDAG &DAG, const EVT PtrVT,
-                                           bool Is64Bit, bool Is64BitLP64) {
+                                           bool Is64Bit, bool Is64BitLP64,
+                                           bool UseTLSDESC) {
   SDLoc dl(GA);
 
   // Get the start address of the TLS block for this module.
@@ -18607,16 +18616,14 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
   if (Is64Bit) {
     unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
     Base = getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
-                      X86II::MO_TLSLD, /*UseTLSDESC=*/false,
-                      /*LocalDynamic=*/true);
+                      X86II::MO_TLSLD, UseTLSDESC, /*LocalDynamic=*/true);
   } else {
     SDValue InGlue;
     SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
         DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
     InGlue = Chain.getValue(1);
     Base = getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
-                      X86II::MO_TLSLDM, /*UseTLSDESC=*/false,
-                      /*LocalDynamic=*/true);
+                      X86II::MO_TLSLDM, UseTLSDESC, /*LocalDynamic=*/true);
   }
 
   // Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -18706,10 +18713,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     bool UseTLSDESC = DAG.getTarget().useTLSDESC();
     switch (model) {
     case TLSModel::LocalDynamic:
-      if (!UseTLSDESC)
-        return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
-                                           Subtarget.isTarget64BitLP64());
-      [[fallthrough]];
+      return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
+                                         Subtarget.isTarget64BitLP64(),
+                                         UseTLSDESC);
     case TLSModel::GeneralDynamic:
       if (Subtarget.is64Bit()) {
         if (Subtarget.isTarget64BitLP64())
diff --git a/llvm/test/CodeGen/X86/tls-desc.ll b/llvm/test/CodeGen/X86/tls-desc.ll
index 6fc4ffdccf6901..f4b9367ca32494 100644
--- a/llvm/test/CodeGen/X86/tls-desc.ll
+++ b/llvm/test/CodeGen/X86/tls-desc.ll
@@ -160,13 +160,11 @@ define i32 @f4() nounwind {
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:  .Ltmp3:
 ; X86-NEXT:    addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp3-.L3$pb), %ebx
-; X86-NEXT:    leal y at tlsdesc(%ebx), %eax
-; X86-NEXT:    calll *y at tlscall(%eax)
 ; X86-NEXT:    movl %gs:0, %edx
-; X86-NEXT:    movl (%eax,%edx), %ecx
-; X86-NEXT:    leal z at tlsdesc(%ebx), %eax
-; X86-NEXT:    calll *z at tlscall(%eax)
-; X86-NEXT:    addl (%eax,%edx), %ecx
+; X86-NEXT:    leal _TLS_MODULE_BASE_ at tlsdesc(%ebx), %eax
+; X86-NEXT:    calll *_TLS_MODULE_BASE_ at tlscall(%eax)
+; X86-NEXT:    movl y at DTPOFF(%eax,%edx), %ecx
+; X86-NEXT:    addl z at DTPOFF(%eax,%edx), %ecx
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
@@ -174,13 +172,11 @@ define i32 @f4() nounwind {
 ; X32-LABEL: f4:
 ; X32:       # %bb.0:
 ; X32-NEXT:    pushq %rax
-; X32-NEXT:    leal y at tlsdesc(%rip), %eax
-; X32-NEXT:    callq *y at tlscall(%eax)
 ; X32-NEXT:    movl %fs:0, %edx
-; X32-NEXT:    movl (%eax,%edx), %ecx
-; X32-NEXT:    leal z at tlsdesc(%rip), %eax
-; X32-NEXT:    callq *z at tlscall(%eax)
-; X32-NEXT:    addl (%eax,%edx), %ecx
+; X32-NEXT:    leal _TLS_MODULE_BASE_ at tlsdesc(%rip), %eax
+; X32-NEXT:    callq *_TLS_MODULE_BASE_ at tlscall(%eax)
+; X32-NEXT:    movl y at DTPOFF(%eax,%edx), %ecx
+; X32-NEXT:    addl z at DTPOFF(%eax,%edx), %ecx
 ; X32-NEXT:    movl %ecx, %eax
 ; X32-NEXT:    popq %rcx
 ; X32-NEXT:    retq
@@ -188,14 +184,11 @@ define i32 @f4() nounwind {
 ; X64-LABEL: f4:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pushq %rax
-; X64-NEXT:    leaq y at tlsdesc(%rip), %rax
-; X64-NEXT:    callq *y at tlscall(%rax)
-; X64-NEXT:    # kill: def $rax killed $rax killed $eax
 ; X64-NEXT:    movq %fs:0, %rdx
-; X64-NEXT:    movl (%rax,%rdx), %ecx
-; X64-NEXT:    leaq z at tlsdesc(%rip), %rax
-; X64-NEXT:    callq *z at tlscall(%rax)
-; X64-NEXT:    addl (%rax,%rdx), %ecx
+; X64-NEXT:    leaq _TLS_MODULE_BASE_ at tlsdesc(%rip), %rax
+; X64-NEXT:    callq *_TLS_MODULE_BASE_ at tlscall(%rax)
+; X64-NEXT:    movl y at DTPOFF(%rax,%rdx), %ecx
+; X64-NEXT:    addl z at DTPOFF(%rax,%rdx), %ecx
 ; X64-NEXT:    movl %ecx, %eax
 ; X64-NEXT:    popq %rcx
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/tls-desc.s b/llvm/test/CodeGen/X86/tls-desc.s
new file mode 100644
index 00000000000000..e69de29bb2d1d6

>From 35d5350c72bc234c3b24cc3152b40a723854a4a4 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Wed, 6 Mar 2024 18:51:37 +0800
Subject: [PATCH 6/6] Revert change to signatures of GetTLSADDR

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 63 +++++++++++--------------
 llvm/test/CodeGen/X86/tls-desc.s        |  0
 2 files changed, 28 insertions(+), 35 deletions(-)
 delete mode 100644 llvm/test/CodeGen/X86/tls-desc.s

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9c5f24c9a31c37..0a54ff9654326a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18515,15 +18515,15 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   return LowerGlobalOrExternal(Op, DAG, /*ForCall=*/false);
 }
 
-static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain,
-                          GlobalAddressSDNode *GA, SDValue *InGlue,
-                          const EVT PtrVT, unsigned ReturnReg,
-                          unsigned char OperandFlags, bool UseTLSDESC = false,
-                          bool LocalDynamic = false) {
+static SDValue
+GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
+           SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
+           unsigned char OperandFlags, bool LocalDynamic = false) {
   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDLoc dl(GA);
   SDValue TGA;
+  bool UseTLSDESC = DAG.getTarget().useTLSDESC();
   if (LocalDynamic && UseTLSDESC) {
     TGA = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, OperandFlags);
     auto UI = TGA->use_begin();
@@ -18568,10 +18568,9 @@ static SDValue getTLSADDR(SelectionDAG &DAG, SDValue Chain,
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
-static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
-                                               SelectionDAG &DAG,
-                                               const EVT PtrVT,
-                                               bool UseTLSDESC) {
+static SDValue
+LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                                const EVT PtrVT) {
   SDValue InGlue;
   SDLoc dl(GA);  // ? function entry point might be better
   SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
@@ -18579,32 +18578,28 @@ static SDValue LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA,
                                                SDLoc(), PtrVT), InGlue);
   InGlue = Chain.getValue(1);
 
-  return getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD,
-                    UseTLSDESC);
+  return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
-static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA,
-                                               SelectionDAG &DAG,
-                                               const EVT PtrVT,
-                                               bool UseTLSDESC) {
-  return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
-                    X86II::MO_TLSGD, UseTLSDESC);
+static SDValue
+LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                                const EVT PtrVT) {
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
+                    X86::RAX, X86II::MO_TLSGD);
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit ILP32
-static SDValue LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA,
-                                                SelectionDAG &DAG,
-                                                const EVT PtrVT,
-                                                bool UseTLSDESC) {
-  return getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::EAX,
-                    X86II::MO_TLSGD, UseTLSDESC);
+static SDValue
+LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                                 const EVT PtrVT) {
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
+                    X86::EAX, X86II::MO_TLSGD);
 }
 
 static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
                                            SelectionDAG &DAG, const EVT PtrVT,
-                                           bool Is64Bit, bool Is64BitLP64,
-                                           bool UseTLSDESC) {
+                                           bool Is64Bit, bool Is64BitLP64) {
   SDLoc dl(GA);
 
   // Get the start address of the TLS block for this module.
@@ -18615,15 +18610,15 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
   SDValue Base;
   if (Is64Bit) {
     unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX;
-    Base = getTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
-                      X86II::MO_TLSLD, UseTLSDESC, /*LocalDynamic=*/true);
+    Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
+                      X86II::MO_TLSLD, /*LocalDynamic=*/true);
   } else {
     SDValue InGlue;
     SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
         DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
     InGlue = Chain.getValue(1);
-    Base = getTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
-                      X86II::MO_TLSLDM, UseTLSDESC, /*LocalDynamic=*/true);
+    Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
+                      X86II::MO_TLSLDM, /*LocalDynamic=*/true);
   }
 
   // Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -18710,19 +18705,17 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
 
   if (Subtarget.isTargetELF()) {
     TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
-    bool UseTLSDESC = DAG.getTarget().useTLSDESC();
     switch (model) {
     case TLSModel::LocalDynamic:
       return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(),
-                                         Subtarget.isTarget64BitLP64(),
-                                         UseTLSDESC);
+                                         Subtarget.isTarget64BitLP64());
     case TLSModel::GeneralDynamic:
       if (Subtarget.is64Bit()) {
         if (Subtarget.isTarget64BitLP64())
-          return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT, UseTLSDESC);
-        return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT, UseTLSDESC);
+          return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
+        return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT);
       }
-      return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT, UseTLSDESC);
+      return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
     case TLSModel::InitialExec:
     case TLSModel::LocalExec:
       return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(),
diff --git a/llvm/test/CodeGen/X86/tls-desc.s b/llvm/test/CodeGen/X86/tls-desc.s
deleted file mode 100644
index e69de29bb2d1d6..00000000000000



More information about the cfe-commits mailing list