[llvm] bc15bf6 - [X86] matchAdd: don't fold a large offset into a %rip relative address

Tue Jan 28 22:31:33 PST 2020

Author: Fangrui Song
Date: 2020-01-28T22:30:52-08:00
New Revision: bc15bf66dcca76cc06fe71fca35b74dc4d521021

URL: https://github.com/llvm/llvm-project/commit/bc15bf66dcca76cc06fe71fca35b74dc4d521021
DIFF: https://github.com/llvm/llvm-project/commit/bc15bf66dcca76cc06fe71fca35b74dc4d521021.diff

LOG: [X86] matchAdd: don't fold a large offset into a %rip relative address

For `ret i64 add (i64 ptrtoint (i32* @foo to i64), i64 1701208431)`,

```
X86DAGToDAGISel::matchAdd
  ...
// AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
// Try folding offset but fail; there is a symbolic displacement, so offset cannot be too large
      !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
    return false;
  ...
  // Try again after commuting the operands.
// AM.Disp = Val; foldOffsetIntoAddress() does not know there will be a symbolic displacement
  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
// AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
      !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
// Succeeded! Produced leaq sym+disp(%rip),...
    return false;
```

`foldOffsetIntoAddress()` currently does not know there is a symbolic
displacement and can fold a large offset.

The produced `leaq sym+disp(%rip), %rax` instruction is relocated by
an R_X86_64_PC32. If disp is large and sym+disp-rip>=2**31, there
will be a relocation overflow.

This approach is still not elegant. Unfortunately the isRIPRelative
interface is a bit clumsy. I tried several solutions and eventually
picked this one.

Differential Revision: https://reviews.llvm.org/D73606

Added: 
    llvm/test/CodeGen/X86/fold-add-pcrel.ll

Modified: 
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index bf33f399db28..ebaa7cf2c239 100644

--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1581,12 +1581,24 @@ bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM,
   if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
       !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
     return false;
-  AM = Backup;
 
-  // Try again after commuting the operands.
-  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
-      !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
-    return false;
+  // Don't try commuting operands if the address is in the form of
+  // sym+disp(%rip). foldOffsetIntoAddress() currently does not know there is a
+  // symbolic displacement and would fold disp. If disp is just a bit smaller
+  // than 2**31, it can easily cause a relocation overflow.
+  bool NoCommutate = false;
+  if (AM.isRIPRelative() && AM.hasSymbolicDisplacement())
+    if (ConstantSDNode *Cst =
+            dyn_cast<ConstantSDNode>(Handle.getValue().getOperand(1)))
+      NoCommutate = Cst->getSExtValue() != 0;
+
+  AM = Backup;
+  if (!NoCommutate) {
+    // Try again after commutating the operands.
+    if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth + 1) &&
+        !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1))
+      return false;
+  }
   AM = Backup;
 
   // If we couldn't fold both operands into the address at the same time,

diff  --git a/llvm/test/CodeGen/X86/fold-add-pcrel.ll b/llvm/test/CodeGen/X86/fold-add-pcrel.ll
new file mode 100644
index 000000000000..745a641f70ff
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fold-add-pcrel.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=x86_64 -relocation-model=static < %s | FileCheck --check-prefixes=CHECK,STATIC %s
+; RUN: llc -mtriple=x86_64 -relocation-model=pic < %s | FileCheck --check-prefixes=CHECK,PIC %s
+; RUN: llc -mtriple=x86_64 -code-model=medium -relocation-model=static < %s | FileCheck --check-prefixes=CHECK,MSTATIC %s
+; RUN: llc -mtriple=x86_64 -code-model=medium -relocation-model=pic < %s | FileCheck --check-prefixes=CHECK,MPIC %s
+
+ at foo = dso_local global i32 0
+
+define dso_local i64 @zero() {
+; CHECK-LABEL: zero:
+; CHECK:       # %bb.0:
+; STATIC-NEXT:   movl $foo, %eax
+; STATIC-NEXT:   retq
+; PIC-NEXT:      leaq foo(%rip), %rax
+; PIC-NEXT:      retq
+; MSTATIC-NEXT:  movabsq $foo, %rax
+; MSTATIC-NEXT:  retq
+; MPIC-NEXT:     leaq _GLOBAL_OFFSET_TABLE_(%rip), %rcx
+; MPIC-NEXT:     movabsq $foo at GOTOFF, %rax
+; MPIC-NEXT:     addq %rcx, %rax
+entry:
+  ret i64 add (i64 ptrtoint (i32* @foo to i64), i64 0)
+}
+
+;; Check we don't fold a large offset into leaq, otherwise
+;; the large r_addend can easily cause a relocation overflow.
+define dso_local i64 @large() {
+; CHECK-LABEL: large:
+; CHECK:       # %bb.0:
+; STATIC-NEXT:   movl $1701208431, %eax
+; STATIC-NEXT:   leaq foo(%rax), %rax
+; PIC-NEXT:      leaq foo(%rip), %rax
+; PIC-NEXT:      addq $1701208431, %rax
+; MSTATIC-NEXT:  movabsq $foo, %rax
+; MSTATIC-NEXT:  addq $1701208431, %rax
+; MSTATIC-NEXT:  retq
+; MPIC-NEXT:     leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax
+; MPIC-NEXT:     movabsq $foo at GOTOFF, %rcx
+; MPIC-NEXT:     leaq 1701208431(%rax,%rcx), %rax
+entry:
+  ret i64 add (i64 ptrtoint (i32* @foo to i64), i64 1701208431)
+}