[llvm] CodeGenPrepare: Add support for llvm.threadlocal.address address-mode sinking (PR #87844)

Matthias Braun via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 11 13:54:02 PDT 2024


https://github.com/MatzeB updated https://github.com/llvm/llvm-project/pull/87844

>From 60d49fd3271a9e44723fde0e149fb525911084b0 Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze at braunis.de>
Date: Fri, 5 Apr 2024 14:28:21 -0700
Subject: [PATCH 1/2] Add test for TLS handling change

---
 .../X86/codegen-prepare-addrmode-tls.ll       | 186 ++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll

diff --git a/llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll b/llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll
new file mode 100644
index 00000000000000..d49885c127bc6e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -o - %s | FileCheck %s --check-prefix=NOPIC
+; RUN: llc -o - %s -relocation-model=pic | FileCheck %s --check-prefix=PIC
+; RUN: llc -o - %s -relocation-model=pic -enable-tlsdesc | FileCheck %s --check-prefix=TLSDESC
+
+target triple = "x86_64--linux-gnu"
+
+declare void @effect()
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+ at foo_local = dso_local thread_local(localexec) global i32 0, align 4
+
+define i32 @func_local_tls(i32 %arg0, i32 %arg1) nounwind {
+; NOPIC-LABEL: func_local_tls:
+; NOPIC:       # %bb.0: # %entry
+; NOPIC-NEXT:    pushq %r14
+; NOPIC-NEXT:    pushq %rbx
+; NOPIC-NEXT:    pushq %rax
+; NOPIC-NEXT:    movl %fs:foo_local at TPOFF, %ebx
+; NOPIC-NEXT:    testl %edi, %edi
+; NOPIC-NEXT:    movl %ebx, %eax
+; NOPIC-NEXT:    jne .LBB0_2
+; NOPIC-NEXT:  # %bb.1: # %if.then
+; NOPIC-NEXT:    movq %fs:0, %rax
+; NOPIC-NEXT:    leaq foo_local at TPOFF(%rax), %r14
+; NOPIC-NEXT:    callq effect at PLT
+; NOPIC-NEXT:    movl (%r14), %eax
+; NOPIC-NEXT:  .LBB0_2: # %if.end
+; NOPIC-NEXT:    addl %ebx, %eax
+; NOPIC-NEXT:    addq $8, %rsp
+; NOPIC-NEXT:    popq %rbx
+; NOPIC-NEXT:    popq %r14
+; NOPIC-NEXT:    retq
+;
+; PIC-LABEL: func_local_tls:
+; PIC:       # %bb.0: # %entry
+; PIC-NEXT:    pushq %r14
+; PIC-NEXT:    pushq %rbx
+; PIC-NEXT:    pushq %rax
+; PIC-NEXT:    movl %fs:.Lfoo_local$local at TPOFF, %ebx
+; PIC-NEXT:    testl %edi, %edi
+; PIC-NEXT:    movl %ebx, %eax
+; PIC-NEXT:    jne .LBB0_2
+; PIC-NEXT:  # %bb.1: # %if.then
+; PIC-NEXT:    movq %fs:0, %rax
+; PIC-NEXT:    leaq .Lfoo_local$local at TPOFF(%rax), %r14
+; PIC-NEXT:    callq effect at PLT
+; PIC-NEXT:    movl (%r14), %eax
+; PIC-NEXT:  .LBB0_2: # %if.end
+; PIC-NEXT:    addl %ebx, %eax
+; PIC-NEXT:    addq $8, %rsp
+; PIC-NEXT:    popq %rbx
+; PIC-NEXT:    popq %r14
+; PIC-NEXT:    retq
+;
+; TLSDESC-LABEL: func_local_tls:
+; TLSDESC:       # %bb.0: # %entry
+; TLSDESC-NEXT:    pushq %r14
+; TLSDESC-NEXT:    pushq %rbx
+; TLSDESC-NEXT:    pushq %rax
+; TLSDESC-NEXT:    movl %fs:.Lfoo_local$local at TPOFF, %ebx
+; TLSDESC-NEXT:    testl %edi, %edi
+; TLSDESC-NEXT:    movl %ebx, %eax
+; TLSDESC-NEXT:    jne .LBB0_2
+; TLSDESC-NEXT:  # %bb.1: # %if.then
+; TLSDESC-NEXT:    movq %fs:0, %rax
+; TLSDESC-NEXT:    leaq .Lfoo_local$local at TPOFF(%rax), %r14
+; TLSDESC-NEXT:    callq effect at PLT
+; TLSDESC-NEXT:    movl (%r14), %eax
+; TLSDESC-NEXT:  .LBB0_2: # %if.end
+; TLSDESC-NEXT:    addl %ebx, %eax
+; TLSDESC-NEXT:    addq $8, %rsp
+; TLSDESC-NEXT:    popq %rbx
+; TLSDESC-NEXT:    popq %r14
+; TLSDESC-NEXT:    retq
+entry:
+  %addr = tail call ptr @llvm.threadlocal.address.p0(ptr @foo_local)
+  %load0 = load i32, ptr %addr, align 4
+  %cond = icmp eq i32 %arg0, 0
+  br i1 %cond, label %if.then, label %if.end
+
+if.then:
+  tail call void @effect()
+  %x = add i32 %arg1, 42
+  %addr1 = getelementptr inbounds i32, ptr %addr, i32 %x
+  %load1 = load i32, ptr %addr, align 4
+  br label %if.end
+
+if.end:
+  %phi = phi i32 [ %load1, %if.then ], [ %load0, %entry ]
+  %ret = add i32 %phi, %load0
+  ret i32 %ret
+}
+
+ at foo_nonlocal = thread_local global i32 0, align 4
+
+define i32 @func_nonlocal_tls(i32 %arg0, i32 %arg1) nounwind {
+; NOPIC-LABEL: func_nonlocal_tls:
+; NOPIC:       # %bb.0: # %entry
+; NOPIC-NEXT:    pushq %r14
+; NOPIC-NEXT:    pushq %rbx
+; NOPIC-NEXT:    pushq %rax
+; NOPIC-NEXT:    movq foo_nonlocal at GOTTPOFF(%rip), %r14
+; NOPIC-NEXT:    movl %fs:(%r14), %ebx
+; NOPIC-NEXT:    testl %edi, %edi
+; NOPIC-NEXT:    movl %ebx, %eax
+; NOPIC-NEXT:    jne .LBB1_2
+; NOPIC-NEXT:  # %bb.1: # %if.then
+; NOPIC-NEXT:    addq %fs:0, %r14
+; NOPIC-NEXT:    callq effect at PLT
+; NOPIC-NEXT:    movl (%r14), %eax
+; NOPIC-NEXT:  .LBB1_2: # %if.end
+; NOPIC-NEXT:    addl %ebx, %eax
+; NOPIC-NEXT:    addq $8, %rsp
+; NOPIC-NEXT:    popq %rbx
+; NOPIC-NEXT:    popq %r14
+; NOPIC-NEXT:    retq
+;
+; PIC-LABEL: func_nonlocal_tls:
+; PIC:       # %bb.0: # %entry
+; PIC-NEXT:    pushq %rbp
+; PIC-NEXT:    pushq %r14
+; PIC-NEXT:    pushq %rbx
+; PIC-NEXT:    movl %edi, %ebp
+; PIC-NEXT:    data16
+; PIC-NEXT:    leaq foo_nonlocal at TLSGD(%rip), %rdi
+; PIC-NEXT:    data16
+; PIC-NEXT:    data16
+; PIC-NEXT:    rex64
+; PIC-NEXT:    callq __tls_get_addr at PLT
+; PIC-NEXT:    movq %rax, %rbx
+; PIC-NEXT:    movl (%rax), %r14d
+; PIC-NEXT:    testl %ebp, %ebp
+; PIC-NEXT:    movl %r14d, %eax
+; PIC-NEXT:    jne .LBB1_2
+; PIC-NEXT:  # %bb.1: # %if.then
+; PIC-NEXT:    callq effect at PLT
+; PIC-NEXT:    movl (%rbx), %eax
+; PIC-NEXT:  .LBB1_2: # %if.end
+; PIC-NEXT:    addl %r14d, %eax
+; PIC-NEXT:    popq %rbx
+; PIC-NEXT:    popq %r14
+; PIC-NEXT:    popq %rbp
+; PIC-NEXT:    retq
+;
+; TLSDESC-LABEL: func_nonlocal_tls:
+; TLSDESC:       # %bb.0: # %entry
+; TLSDESC-NEXT:    pushq %r14
+; TLSDESC-NEXT:    pushq %rbx
+; TLSDESC-NEXT:    pushq %rax
+; TLSDESC-NEXT:    leaq foo_nonlocal at tlsdesc(%rip), %rax
+; TLSDESC-NEXT:    callq *foo_nonlocal at tlscall(%rax)
+; TLSDESC-NEXT:    movl %fs:(%rax), %ebx
+; TLSDESC-NEXT:    testl %edi, %edi
+; TLSDESC-NEXT:    movl %ebx, %ecx
+; TLSDESC-NEXT:    jne .LBB1_2
+; TLSDESC-NEXT:  # %bb.1: # %if.then
+; TLSDESC-NEXT:    addq %fs:0, %rax
+; TLSDESC-NEXT:    movq %rax, %r14
+; TLSDESC-NEXT:    callq effect at PLT
+; TLSDESC-NEXT:    movl (%r14), %ecx
+; TLSDESC-NEXT:  .LBB1_2: # %if.end
+; TLSDESC-NEXT:    addl %ebx, %ecx
+; TLSDESC-NEXT:    movl %ecx, %eax
+; TLSDESC-NEXT:    addq $8, %rsp
+; TLSDESC-NEXT:    popq %rbx
+; TLSDESC-NEXT:    popq %r14
+; TLSDESC-NEXT:    retq
+entry:
+  %addr = tail call ptr @llvm.threadlocal.address.p0(ptr @foo_nonlocal)
+  %load0 = load i32, ptr %addr, align 4
+  %cond = icmp eq i32 %arg0, 0
+  br i1 %cond, label %if.then, label %if.end
+
+if.then:
+  tail call void @effect()
+  %x = add i32 %arg1, 42
+  %addr1 = getelementptr inbounds i32, ptr %addr, i32 %x
+  %load1 = load i32, ptr %addr, align 4
+  br label %if.end
+
+if.end:
+  %phi = phi i32 [ %load1, %if.then ], [ %load0, %entry ]
+  %ret = add i32 %phi, %load0
+  ret i32 %ret
+}

>From 864723f5851a6795ab3c07d9f901b7b12dcefa3a Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze at braunis.de>
Date: Fri, 5 Apr 2024 11:59:23 -0700
Subject: [PATCH 2/2] CodeGenPrepare: Remove threadlocal_address intrinsic when
 cheap to recompute.

The `threadlocal_address` intrinsic is currently ignored/removed for
instruction selection by the `SelectionDAGBuilder` (see also
https://reviews.llvm.org/D125291 ).

However being an Instruction means `SelectionDAG` will assign a register
to it and share the value across basic blocks. This sharing is
suboptimal in the "LocalExec" TLS model on x86 where it is cheaper to
just recompute the address. We saw a 0.5% regression in a codebase with
a lot of TLS usage (HHVM).

This introduces a new `cheapToRecomputeTLSAddress` target lowering
callback and removes the `threadlocal_address` intrinsic in
`CodeGenPrepare` to restore the efficient behavior from before the
introduction of the `threadlocal_address` intrinsic.

This fixes #87437
---
 llvm/include/llvm/CodeGen/TargetLowering.h    |  6 ++
 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 18 +++-
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 24 ++++++
 llvm/lib/Target/X86/X86ISelLowering.h         |  2 +
 .../X86/codegen-prepare-addrmode-tls.ll       | 27 +-----
 .../CodeGenPrepare/X86/sink-addrmode-tls.ll   | 83 +++++++++++++++++++
 6 files changed, 135 insertions(+), 25 deletions(-)
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-tls.ll

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index a4dc097446186a..05b22289d6e247 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2798,6 +2798,12 @@ class TargetLoweringBase {
                                      Type *Ty, unsigned AddrSpace,
                                      Instruction *I = nullptr) const;
 
+  /// Returns true if the targets addressing mode can target thread local
+  /// storage (TLS).
+  virtual bool addressingModeSupportsTLS(const GlobalValue &) const {
+    return false;
+  }
+
   /// Return the prefered common base offset.
   virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
                                                  int64_t MaxOffset) const {
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index e657872c382848..256145e41b9f43 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5082,6 +5082,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
     }
     return true;
   }
+  case Instruction::Call:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
+      if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
+        GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
+        if (TLI.addressingModeSupportsTLS(GV))
+          return matchAddr(AddrInst->getOperand(0), Depth);
+      }
+    }
+    break;
   }
   return false;
 }
@@ -5620,11 +5629,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
         return Modified;
     }
 
-    if (AddrMode.BaseGV) {
+    GlobalValue *BaseGV = AddrMode.BaseGV;
+    if (BaseGV != nullptr) {
       if (ResultPtr)
         return Modified;
 
-      ResultPtr = AddrMode.BaseGV;
+      if (BaseGV->isThreadLocal()) {
+        ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
+      } else {
+        ResultPtr = BaseGV;
+      }
     }
 
     // If the real base value actually came from an inttoptr, then the matcher
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f274da6f6f7767..3358d7918f4b08 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18928,6 +18928,30 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("TLS not implemented for this target.");
 }
 
+bool X86TargetLowering::addressingModeSupportsTLS(const GlobalValue &GV) const {
+  if (Subtarget.is64Bit() && Subtarget.isTargetELF()) {
+    const TargetMachine &TM = getTargetMachine();
+    TLSModel::Model Model = TM.getTLSModel(&GV);
+    switch (Model) {
+    case TLSModel::LocalExec:
+    case TLSModel::InitialExec:
+      // We can include the %fs segment register in addressing modes.
+      return true;
+    case TLSModel::LocalDynamic:
+    case TLSModel::GeneralDynamic:
+      // These models do not result in %fs relative addresses unless
+      // TLS descriptior are used.
+      //
+      // Even in the case of TLS descriptors we currently have no way to model
+      // the difference between %fs access and the computations needed for the
+      // offset and returning `true` for TLS-desc currently duplicates both
+      // which is detrimental :-/
+      return false;
+    }
+  }
+  return false;
+}
+
 /// Lower SRA_PARTS and friends, which return two i32 values
 /// and take a 2 x i32 value to shift plus a shift amount.
 /// TODO: Can this be moved to general expansion code?
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 0a1e8ca4427314..e348ba6e8ac085 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1323,6 +1323,8 @@ namespace llvm {
                                Type *Ty, unsigned AS,
                                Instruction *I = nullptr) const override;
 
+    bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
+
     /// Return true if the specified immediate is legal
     /// icmp immediate, that is the target has icmp instructions which can
     /// compare a register against the immediate without having to materialize
diff --git a/llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll b/llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll
index d49885c127bc6e..06b2585a477564 100644
--- a/llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll
+++ b/llvm/test/CodeGen/X86/codegen-prepare-addrmode-tls.ll
@@ -13,65 +13,47 @@ declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
 define i32 @func_local_tls(i32 %arg0, i32 %arg1) nounwind {
 ; NOPIC-LABEL: func_local_tls:
 ; NOPIC:       # %bb.0: # %entry
-; NOPIC-NEXT:    pushq %r14
 ; NOPIC-NEXT:    pushq %rbx
-; NOPIC-NEXT:    pushq %rax
 ; NOPIC-NEXT:    movl %fs:foo_local at TPOFF, %ebx
 ; NOPIC-NEXT:    testl %edi, %edi
 ; NOPIC-NEXT:    movl %ebx, %eax
 ; NOPIC-NEXT:    jne .LBB0_2
 ; NOPIC-NEXT:  # %bb.1: # %if.then
-; NOPIC-NEXT:    movq %fs:0, %rax
-; NOPIC-NEXT:    leaq foo_local at TPOFF(%rax), %r14
 ; NOPIC-NEXT:    callq effect at PLT
-; NOPIC-NEXT:    movl (%r14), %eax
+; NOPIC-NEXT:    movl %fs:foo_local at TPOFF, %eax
 ; NOPIC-NEXT:  .LBB0_2: # %if.end
 ; NOPIC-NEXT:    addl %ebx, %eax
-; NOPIC-NEXT:    addq $8, %rsp
 ; NOPIC-NEXT:    popq %rbx
-; NOPIC-NEXT:    popq %r14
 ; NOPIC-NEXT:    retq
 ;
 ; PIC-LABEL: func_local_tls:
 ; PIC:       # %bb.0: # %entry
-; PIC-NEXT:    pushq %r14
 ; PIC-NEXT:    pushq %rbx
-; PIC-NEXT:    pushq %rax
 ; PIC-NEXT:    movl %fs:.Lfoo_local$local at TPOFF, %ebx
 ; PIC-NEXT:    testl %edi, %edi
 ; PIC-NEXT:    movl %ebx, %eax
 ; PIC-NEXT:    jne .LBB0_2
 ; PIC-NEXT:  # %bb.1: # %if.then
-; PIC-NEXT:    movq %fs:0, %rax
-; PIC-NEXT:    leaq .Lfoo_local$local at TPOFF(%rax), %r14
 ; PIC-NEXT:    callq effect at PLT
-; PIC-NEXT:    movl (%r14), %eax
+; PIC-NEXT:    movl %fs:.Lfoo_local$local at TPOFF, %eax
 ; PIC-NEXT:  .LBB0_2: # %if.end
 ; PIC-NEXT:    addl %ebx, %eax
-; PIC-NEXT:    addq $8, %rsp
 ; PIC-NEXT:    popq %rbx
-; PIC-NEXT:    popq %r14
 ; PIC-NEXT:    retq
 ;
 ; TLSDESC-LABEL: func_local_tls:
 ; TLSDESC:       # %bb.0: # %entry
-; TLSDESC-NEXT:    pushq %r14
 ; TLSDESC-NEXT:    pushq %rbx
-; TLSDESC-NEXT:    pushq %rax
 ; TLSDESC-NEXT:    movl %fs:.Lfoo_local$local at TPOFF, %ebx
 ; TLSDESC-NEXT:    testl %edi, %edi
 ; TLSDESC-NEXT:    movl %ebx, %eax
 ; TLSDESC-NEXT:    jne .LBB0_2
 ; TLSDESC-NEXT:  # %bb.1: # %if.then
-; TLSDESC-NEXT:    movq %fs:0, %rax
-; TLSDESC-NEXT:    leaq .Lfoo_local$local at TPOFF(%rax), %r14
 ; TLSDESC-NEXT:    callq effect at PLT
-; TLSDESC-NEXT:    movl (%r14), %eax
+; TLSDESC-NEXT:    movl %fs:.Lfoo_local$local at TPOFF, %eax
 ; TLSDESC-NEXT:  .LBB0_2: # %if.end
 ; TLSDESC-NEXT:    addl %ebx, %eax
-; TLSDESC-NEXT:    addq $8, %rsp
 ; TLSDESC-NEXT:    popq %rbx
-; TLSDESC-NEXT:    popq %r14
 ; TLSDESC-NEXT:    retq
 entry:
   %addr = tail call ptr @llvm.threadlocal.address.p0(ptr @foo_local)
@@ -106,9 +88,8 @@ define i32 @func_nonlocal_tls(i32 %arg0, i32 %arg1) nounwind {
 ; NOPIC-NEXT:    movl %ebx, %eax
 ; NOPIC-NEXT:    jne .LBB1_2
 ; NOPIC-NEXT:  # %bb.1: # %if.then
-; NOPIC-NEXT:    addq %fs:0, %r14
 ; NOPIC-NEXT:    callq effect at PLT
-; NOPIC-NEXT:    movl (%r14), %eax
+; NOPIC-NEXT:    movl %fs:(%r14), %eax
 ; NOPIC-NEXT:  .LBB1_2: # %if.end
 ; NOPIC-NEXT:    addl %ebx, %eax
 ; NOPIC-NEXT:    addq $8, %rsp
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-tls.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-tls.ll
new file mode 100644
index 00000000000000..0397240421340a
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-tls.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s | FileCheck %s
+
+target triple = "x86_64--linux-gnu"
+
+ at foo = dso_local thread_local(localexec) global i32 0, align 4
+
+declare void @effect()
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+define i32 @func0(i32 %arg) {
+; CHECK-LABEL: define i32 @func0(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADDR:%.*]] = tail call ptr @llvm.threadlocal.address.p0(ptr @foo)
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i32, ptr [[ADDR]], align 4
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[ARG]], 0
+; CHECK-NEXT:    br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @effect()
+; CHECK-NEXT:    [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @foo)
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LOAD1]], [[IF_THEN]] ], [ [[LOAD0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[PHI]], [[LOAD0]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+entry:
+  %addr = tail call ptr @llvm.threadlocal.address.p0(ptr @foo)
+  %load0 = load i32, ptr %addr, align 4
+  %cond = icmp eq i32 %arg, 0
+  br i1 %cond, label %if.then, label %if.end
+
+if.then:
+  tail call void @effect()
+  %load1 = load i32, ptr %addr, align 4
+  br label %if.end
+
+if.end:
+  %phi = phi i32 [ %load1, %if.then ], [ %load0, %entry ]
+  %ret = add i32 %phi, %load0
+  ret i32 %ret
+}
+
+define i32 @func1(i32 %arg0, i32 %arg1) {
+; CHECK-LABEL: define i32 @func1(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADDR:%.*]] = tail call ptr @llvm.threadlocal.address.p0(ptr @foo)
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i32, ptr [[ADDR]], align 4
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @effect()
+; CHECK-NEXT:    [[X:%.*]] = add i32 [[ARG1]], 42
+; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i32 [[X]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @foo)
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[LOAD1]], [[IF_THEN]] ], [ [[LOAD0]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RET:%.*]] = add i32 [[PHI]], [[LOAD0]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+entry:
+  %addr = tail call ptr @llvm.threadlocal.address.p0(ptr @foo)
+  %load0 = load i32, ptr %addr, align 4
+  %cond = icmp eq i32 %arg0, 0
+  br i1 %cond, label %if.then, label %if.end
+
+if.then:
+  tail call void @effect()
+  %x = add i32 %arg1, 42
+  %addr1 = getelementptr inbounds i32, ptr %addr, i32 %x
+  %load1 = load i32, ptr %addr, align 4
+  br label %if.end
+
+if.end:
+  %phi = phi i32 [ %load1, %if.then ], [ %load0, %entry ]
+  %ret = add i32 %phi, %load0
+  ret i32 %ret
+}



More information about the llvm-commits mailing list