[Openmp-commits] [compiler-rt] [libcxx] [mlir] [libc] [flang] [llvm] [clang] [openmp] [clang-tools-extra] [libunwind] [X86][ISel] Don't select MOV/ADD64ri32 for tglobaltlsaddr under large code models (PR #77175)

Nicholas Mosier via Openmp-commits openmp-commits at lists.llvm.org
Fri Jan 5 20:13:41 PST 2024


https://github.com/nmosier updated https://github.com/llvm/llvm-project/pull/77175

>From b6351e796868be27075518fb51830bf79c60cdf6 Mon Sep 17 00:00:00 2001
From: Nicholas Mosier <nmosier at stanford.edu>
Date: Sat, 6 Jan 2024 03:36:03 +0000
Subject: [PATCH] [X86][ISel] Select MOV/ADD64ri32 for tglobaltlsaddr only
 under small code models

This patch fixes a bug (GitHub issue #77128) that caused the compiler
to emit 32-bit (rather than 64-bit) relocations for TLS offsets under a
medium/large code model for x86-64 targets, resulting in link-time errors.

The root cause of the bug is an X86 instruction selection pattern that
errantly matches tglobaltlsaddr SDNodes to target MOV64ri32/ADD64ri32 SDNodes
during the Select phase of instruction selection, regardless of the code
model.

This patch adds the requirement `Requires<[NearData]>` to both X86 selection
patterns, ensuring that they only match when the code model is tiny/small/kernel.
It also adds a new test (llvm/test/CodeGen/X86/tls-largecode.ll) to ensure the patterns are not matched for medium/large code models.
---
 llvm/lib/Target/X86/X86InstrCompiler.td |  4 +--
 llvm/test/CodeGen/X86/tls-codemodels.ll | 36 +++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/tls-codemodels.ll

diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index c77c77ee4a3eeb..67fb593e391d34 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1267,10 +1267,10 @@ def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>;
 // tls has some funny stuff here...
 // This corresponds to movabs $foo at tpoff, %rax
 def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
-          (MOV64ri32 tglobaltlsaddr :$dst)>;
+          (MOV64ri32 tglobaltlsaddr :$dst)>, Requires<[NearData]>;
 // This corresponds to add $foo at tpoff, %rax
 def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
-          (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+          (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>, Requires<[NearData]>;
 
 
 // Direct PC relative function call for small code model. 32-bit displacement
diff --git a/llvm/test/CodeGen/X86/tls-codemodels.ll b/llvm/test/CodeGen/X86/tls-codemodels.ll
new file mode 100644
index 00000000000000..644e07300f3928
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tls-codemodels.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -code-model=small | FileCheck %s --check-prefix=CHECK-SMALL
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -code-model=kernel | FileCheck %s --check-prefix=CHECK-KERNEL
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -code-model=medium | FileCheck %s --check-prefix=CHECK-MEDIUM
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -code-model=large | FileCheck %s --check-prefix=CHECK-LARGE
+
+ at x = dso_local thread_local global i32 0, align 4
+
+define dso_local void @test() local_unnamed_addr {
+; CHECK-SMALL-LABEL: test:
+; CHECK-SMALL:       # %bb.0: # %entry
+; CHECK-SMALL-NEXT:    movl $0, %fs:x at TPOFF
+; CHECK-SMALL-NEXT:    retq
+;
+; CHECK-KERNEL-LABEL: test:
+; CHECK-KERNEL:       # %bb.0: # %entry
+; CHECK-KERNEL-NEXT:    movl $0, %fs:x at TPOFF
+; CHECK-KERNEL-NEXT:    retq
+;
+; CHECK-MEDIUM-LABEL: test:
+; CHECK-MEDIUM:       # %bb.0: # %entry
+; CHECK-MEDIUM-NEXT:    movl $0, %fs:x at TPOFF
+; CHECK-MEDIUM-NEXT:    retq
+;
+; CHECK-LARGE-LABEL: test:
+; CHECK-LARGE:       # %bb.0: # %entry
+; CHECK-LARGE-NEXT:    movabsq $x at TPOFF, %rax
+; CHECK-LARGE-NEXT:    movl $0, %fs:(%rax)
+; CHECK-LARGE-NEXT:    retq
+entry:
+  %0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x)
+  store i32 0, ptr %0, align 4
+  ret void
+}
+
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)



More information about the Openmp-commits mailing list