[llvm-branch-commits] [llvm] 09d0e7a - [X86] Avoid %fs:(%eax) references in x32 mode

Wed Dec 16 14:45:20 PST 2020

Author: Harald van Dijk
Date: 2020-12-16T22:39:57Z
New Revision: 09d0e7a7c153820f66597ac431d4453e272f204e

URL: https://github.com/llvm/llvm-project/commit/09d0e7a7c153820f66597ac431d4453e272f204e
DIFF: https://github.com/llvm/llvm-project/commit/09d0e7a7c153820f66597ac431d4453e272f204e.diff

LOG: [X86] Avoid %fs:(%eax) references in x32 mode

The ABI explains that %fs:(%eax) zero-extends %eax to 64 bits, and adds
that the TLS base address, but that the TLS base address need not be
at the start of the TLS block, TLS references may use negative offsets.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D93158

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/test/CodeGen/X86/pic.ll
    llvm/test/CodeGen/X86/tls-pie.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5d197e4d5f76..d7c8e88640af 100644

--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -207,7 +207,8 @@ namespace {
     void Select(SDNode *N) override;
 
     bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
-    bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+    bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
+                            bool AllowSegmentRegForX32 = false);
     bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
     bool matchAddress(SDValue N, X86ISelAddressMode &AM);
     bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
@@ -1613,20 +1614,26 @@ bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
 
 }
 
-bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
+                                         bool AllowSegmentRegForX32) {
   SDValue Address = N->getOperand(1);
 
   // load gs:0 -> GS segment register.
   // load fs:0 -> FS segment register.
   //
-  // This optimization is valid because the GNU TLS model defines that
-  // gs:0 (or fs:0 on X86-64) contains its own address.
+  // This optimization is generally valid because the GNU TLS model defines that
+  // gs:0 (or fs:0 on X86-64) contains its own address. However, for X86-64 mode
+  // with 32-bit registers, as we get in ILP32 mode, those registers are first
+  // zero-extended to 64 bits and then added it to the base address, which gives
+  // unwanted results when the register holds a negative value.
   // For more information see http://people.redhat.com/drepper/tls.pdf
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) {
     if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
         !IndirectTlsSegRefs &&
         (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
-         Subtarget->isTargetFuchsia()))
+         Subtarget->isTargetFuchsia())) {
+      if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
+        return true;
       switch (N->getPointerInfo().getAddrSpace()) {
       case X86AS::GS:
         AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -1637,6 +1644,8 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
       // Address space X86AS::SS is not handled here, because it is not used to
       // address TLS areas.
       }
+    }
+  }
 
   return true;
 }
@@ -1720,6 +1729,21 @@ bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
   if (matchAddressRecursively(N, AM, 0))
     return true;
 
+  // Post-processing: Make a second attempt to fold a load, if we now know
+  // that there will not be any other register. This is only performed for
+  // 64-bit ILP32 mode since 32-bit mode and 64-bit LP64 mode will have folded
+  // any foldable load the first time.
+  if (Subtarget->isTarget64BitILP32() &&
+      AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) {
+    SDValue Save_Base_Reg = AM.Base_Reg;
+    if (auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) {
+      AM.Base_Reg = SDValue();
+      if (matchLoadInAddress(LoadN, AM, /*AllowSegmentRegForX32=*/true))
+        AM.Base_Reg = Save_Base_Reg;
+    }
+  }
+
   // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
   // a smaller encoding and avoids a scaled-index.
   if (AM.Scale == 2 &&

diff  --git a/llvm/test/CodeGen/X86/pic.ll b/llvm/test/CodeGen/X86/pic.ll
index 101c749633bc..b7d63dce8626 100644
--- a/llvm/test/CodeGen/X86/pic.ll
+++ b/llvm/test/CodeGen/X86/pic.ll
@@ -336,17 +336,18 @@ entry:
 ; CHECK-I686-DAG:	movl	%gs:0,
 ; CHECK-X32-DAG:	movl	tlsdstie at GOTTPOFF(%rip),
 ; CHECK-X32-DAG:	movl	%fs:0,
-; CHECK:	addl
+; CHECK-I686:	addl
+; CHECK-X32:	leal	({{%.*,%.*}}),
 ; CHECK-I686:	movl	tlsptrie at GOTNTPOFF(
 ; CHECK-X32:	movl	tlsptrie at GOTTPOFF(%rip),
 ; CHECK-I686:	movl	{{%.*}}, %gs:(
-; CHECK-X32:	movl	{{%.*}}, %fs:(
+; CHECK-X32:	movl	{{%.*}}, ({{%.*,%.*}})
 ; CHECK-I686:	movl	tlssrcie at GOTNTPOFF(
 ; CHECK-X32:	movl	tlssrcie at GOTTPOFF(%rip),
 ; CHECK-I686:	movl	%gs:(
-; CHECK-X32:	movl	%fs:(
+; CHECK-X32:	movl	({{%.*,%.*}}),
 ; CHECK-I686:	movl	{{%.*}}, %gs:(
-; CHECK-X32:	movl	{{%.*}}, %fs:(
+; CHECK-X32:	movl	{{%.*}}, ({{%.*,%.*}})
 ; CHECK-I686:	ret
 ; CHECK-X32:	retq
 }

diff  --git a/llvm/test/CodeGen/X86/tls-pie.ll b/llvm/test/CodeGen/X86/tls-pie.ll
index 4f5c4f8fed58..854482a775d3 100644
--- a/llvm/test/CodeGen/X86/tls-pie.ll
+++ b/llvm/test/CodeGen/X86/tls-pie.ll
@@ -65,7 +65,8 @@ define i32 @f3() {
 ; X32-LABEL: f3:
 ; X32:       # %bb.0: # %entry
 ; X32-NEXT:    movl i2@{{.*}}(%rip), %eax
-; X32-NEXT:    movl %fs:(%eax), %eax
+; X32-NEXT:    movl %fs:0, %ecx
+; X32-NEXT:    movl (%ecx,%eax), %eax
 ; X32-NEXT:    retq
 ;
 ; X64-LABEL: f3: