[llvm] [AIX][TLS] Produce a faster local-exec access sequence for the "aix-small-tls" global variable attribute (PR #83053)

Felix via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 22 06:59:05 PDT 2024


================
@@ -0,0 +1,196 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN:      | FileCheck %s --check-prefix=CHECK-SMALLCM64
+; RUN: llc  -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN:      -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN:      < %s | FileCheck %s --check-prefix=CHECK-LARGECM64
+
+ at mySmallLocalExecTLS6 = external thread_local(localexec) global [60 x i64], align 8
+ at mySmallLocalExecTLS2 = external thread_local(localexec) global [3000 x i64], align 8 #0
+ at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS3 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS4 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallLocalExecTLS5 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+; All accesses use a "faster" local-exec sequence directly off the thread pointer.
+define i64 @StoreLargeAccess1() #1 {
+; CHECK-SMALLCM64-LABEL: StoreLargeAccess1:
+; CHECK-SMALLCM64:       # %bb.0: # %entry
+; CHECK-SMALLCM64-NEXT:    mflr r0
+; CHECK-SMALLCM64-NEXT:    stdu r1, -48(r1)
+; CHECK-SMALLCM64-NEXT:    li r3, 212
+; CHECK-SMALLCM64-NEXT:    li r4, 203
+; CHECK-SMALLCM64-NEXT:    std r0, 64(r1)
+; CHECK-SMALLCM64-NEXT:    std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; CHECK-SMALLCM64-NEXT:    std r4, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; CHECK-SMALLCM64-NEXT:    ld r3, L..C0(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
+; CHECK-SMALLCM64-NEXT:    ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; CHECK-SMALLCM64-NEXT:    bla .__tls_get_addr[PR]
+; CHECK-SMALLCM64-NEXT:    li r4, 44
+; CHECK-SMALLCM64-NEXT:    std r4, 440(r3)
+; CHECK-SMALLCM64-NEXT:    li r3, 6
+; CHECK-SMALLCM64-NEXT:    li r4, 100
+; CHECK-SMALLCM64-NEXT:    std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
+; CHECK-SMALLCM64-NEXT:    li r3, 882
+; CHECK-SMALLCM64-NEXT:    std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
+; CHECK-SMALLCM64-NEXT:    std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
+; CHECK-SMALLCM64-NEXT:    li r3, 1191
+; CHECK-SMALLCM64-NEXT:    addi r1, r1, 48
+; CHECK-SMALLCM64-NEXT:    ld r0, 16(r1)
+; CHECK-SMALLCM64-NEXT:    mtlr r0
+; CHECK-SMALLCM64-NEXT:    blr
+;
+; CHECK-LARGECM64-LABEL: StoreLargeAccess1:
+; CHECK-LARGECM64:       # %bb.0: # %entry
+; CHECK-LARGECM64-NEXT:    mflr r0
+; CHECK-LARGECM64-NEXT:    stdu r1, -48(r1)
+; CHECK-LARGECM64-NEXT:    li r3, 212
+; CHECK-LARGECM64-NEXT:    std r0, 64(r1)
+; CHECK-LARGECM64-NEXT:    addis r4, L..C0 at u(r2)
+; CHECK-LARGECM64-NEXT:    ld r4, L..C0 at l(r4)
+; CHECK-LARGECM64-NEXT:    std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; CHECK-LARGECM64-NEXT:    li r3, 203
+; CHECK-LARGECM64-NEXT:    std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; CHECK-LARGECM64-NEXT:    addis r3, L..C1 at u(r2)
+; CHECK-LARGECM64-NEXT:    ld r3, L..C1 at l(r3)
+; CHECK-LARGECM64-NEXT:    bla .__tls_get_addr[PR]
+; CHECK-LARGECM64-NEXT:    li r4, 44
+; CHECK-LARGECM64-NEXT:    std r4, 440(r3)
+; CHECK-LARGECM64-NEXT:    li r3, 6
+; CHECK-LARGECM64-NEXT:    li r4, 100
+; CHECK-LARGECM64-NEXT:    std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
+; CHECK-LARGECM64-NEXT:    li r3, 882
+; CHECK-LARGECM64-NEXT:    std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
+; CHECK-LARGECM64-NEXT:    std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
+; CHECK-LARGECM64-NEXT:    li r3, 1191
+; CHECK-LARGECM64-NEXT:    addi r1, r1, 48
+; CHECK-LARGECM64-NEXT:    ld r0, 16(r1)
+; CHECK-LARGECM64-NEXT:    mtlr r0
+; CHECK-LARGECM64-NEXT:    blr
+entry:
+  %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
+  %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
+  store i64 212, ptr %arrayidx, align 8
+  %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
+  %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
+  store i64 203, ptr %arrayidx1, align 8
+  %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
+  %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
+  store i64 44, ptr %arrayidx2, align 8
+  %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
+  %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
+  store i64 6, ptr %arrayidx3, align 8
+  %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
+  %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
+  store i64 100, ptr %arrayidx4, align 8
+  %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
+  %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
+  store i64 882, ptr %arrayidx5, align 8
+  %6 = load i64, ptr %arrayidx1, align 8
+  %7 = load i64, ptr %arrayidx3, align 8
+  %8 = load i64, ptr %arrayidx4, align 8
+  %add = add i64 %6, 882
+  %add9 = add i64 %add, %7
+  %add11 = add i64 %add9, %8
+  ret i64 %add11
+}
+
+; Since this function does not have the 'aix-small-local-exec-tls` attribute,
+; only some local-exec variables should have the small-local-exec TLS access
+; sequence (as opposed to all of them).
+define i64 @StoreLargeAccess2() {
----------------
orcguru wrote:

This case may update to:

define i64 @StoreLargeAccess2() {
entry:
  %tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
  %arrayidx = getelementptr inbounds [60 x i64], ptr %tls0, i64 0, i64 53
  store i64 212, ptr %arrayidx, align 8
  %tls1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
  %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %tls1, i64 0, i64 150
  store i64 203, ptr %arrayidx1, align 8
  %tls2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
  %arrayidx2 = getelementptr inbounds [800 x i64], ptr %tls2, i64 0, i64 55
  store i64 44, ptr %arrayidx2, align 8
  %tls3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
  %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %tls3, i64 0, i64 250
  store i64 6, ptr %arrayidx3, align 8
  %tls4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
  %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %tls4, i64 0, i64 850
  store i64 100, ptr %arrayidx4, align 8
  %tls5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
  %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %tls5, i64 0, i64 1050
  store i64 882, ptr %arrayidx5, align 8
  %load1 = load i64, ptr %arrayidx1, align 8
  %load2 = load i64, ptr %arrayidx3, align 8
  %load3 = load i64, ptr %arrayidx4, align 8
  %add = add i64 %load1, 882
  %add9 = add i64 %add, %load2
  %add11 = add i64 %add9, %load3
  ret i64 %add11
}




https://github.com/llvm/llvm-project/pull/83053


More information about the llvm-commits mailing list