[llvm] [AIX][TLS] Produce a faster local-exec access sequence for the "aix-small-tls" global variable attribute (PR #83053)
Felix via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 22 06:59:05 PDT 2024
================
@@ -0,0 +1,196 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff < %s \
+; RUN: | FileCheck %s --check-prefix=CHECK-SMALLCM64
+; RUN: llc -verify-machineinstrs -mcpu=pwr7 -ppc-asm-full-reg-names \
+; RUN: -mtriple powerpc64-ibm-aix-xcoff --code-model=large \
+; RUN: < %s | FileCheck %s --check-prefix=CHECK-LARGECM64
+
+ at mySmallLocalExecTLS6 = external thread_local(localexec) global [60 x i64], align 8
+ at mySmallLocalExecTLS2 = external thread_local(localexec) global [3000 x i64], align 8 #0
+ at MyTLSGDVar = thread_local global [800 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS3 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8
+ at mySmallLocalExecTLS4 = internal thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+ at mySmallLocalExecTLS5 = thread_local(localexec) global [3000 x i64] zeroinitializer, align 8 #0
+declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)
+
+; All accesses use a "faster" local-exec sequence directly off the thread pointer.
+define i64 @StoreLargeAccess1() #1 {
+; CHECK-SMALLCM64-LABEL: StoreLargeAccess1:
+; CHECK-SMALLCM64: # %bb.0: # %entry
+; CHECK-SMALLCM64-NEXT: mflr r0
+; CHECK-SMALLCM64-NEXT: stdu r1, -48(r1)
+; CHECK-SMALLCM64-NEXT: li r3, 212
+; CHECK-SMALLCM64-NEXT: li r4, 203
+; CHECK-SMALLCM64-NEXT: std r0, 64(r1)
+; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; CHECK-SMALLCM64-NEXT: std r4, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; CHECK-SMALLCM64-NEXT: ld r3, L..C0(r2) # target-flags(ppc-tlsgdm) @MyTLSGDVar
+; CHECK-SMALLCM64-NEXT: ld r4, L..C1(r2) # target-flags(ppc-tlsgd) @MyTLSGDVar
+; CHECK-SMALLCM64-NEXT: bla .__tls_get_addr[PR]
+; CHECK-SMALLCM64-NEXT: li r4, 44
+; CHECK-SMALLCM64-NEXT: std r4, 440(r3)
+; CHECK-SMALLCM64-NEXT: li r3, 6
+; CHECK-SMALLCM64-NEXT: li r4, 100
+; CHECK-SMALLCM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
+; CHECK-SMALLCM64-NEXT: li r3, 882
+; CHECK-SMALLCM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
+; CHECK-SMALLCM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
+; CHECK-SMALLCM64-NEXT: li r3, 1191
+; CHECK-SMALLCM64-NEXT: addi r1, r1, 48
+; CHECK-SMALLCM64-NEXT: ld r0, 16(r1)
+; CHECK-SMALLCM64-NEXT: mtlr r0
+; CHECK-SMALLCM64-NEXT: blr
+;
+; CHECK-LARGECM64-LABEL: StoreLargeAccess1:
+; CHECK-LARGECM64: # %bb.0: # %entry
+; CHECK-LARGECM64-NEXT: mflr r0
+; CHECK-LARGECM64-NEXT: stdu r1, -48(r1)
+; CHECK-LARGECM64-NEXT: li r3, 212
+; CHECK-LARGECM64-NEXT: std r0, 64(r1)
+; CHECK-LARGECM64-NEXT: addis r4, L..C0 at u(r2)
+; CHECK-LARGECM64-NEXT: ld r4, L..C0 at l(r4)
+; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS6[UL]@le+424(r13)
+; CHECK-LARGECM64-NEXT: li r3, 203
+; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS2[UL]@le+1200(r13)
+; CHECK-LARGECM64-NEXT: addis r3, L..C1 at u(r2)
+; CHECK-LARGECM64-NEXT: ld r3, L..C1 at l(r3)
+; CHECK-LARGECM64-NEXT: bla .__tls_get_addr[PR]
+; CHECK-LARGECM64-NEXT: li r4, 44
+; CHECK-LARGECM64-NEXT: std r4, 440(r3)
+; CHECK-LARGECM64-NEXT: li r3, 6
+; CHECK-LARGECM64-NEXT: li r4, 100
+; CHECK-LARGECM64-NEXT: std r3, mySmallLocalExecTLS3[UL]@le+2000(r13)
+; CHECK-LARGECM64-NEXT: li r3, 882
+; CHECK-LARGECM64-NEXT: std r4, (mySmallLocalExecTLS4[UL]@le+6800)-65536(r13)
+; CHECK-LARGECM64-NEXT: std r3, (mySmallLocalExecTLS5[TL]@le+8400)-65536(r13)
+; CHECK-LARGECM64-NEXT: li r3, 1191
+; CHECK-LARGECM64-NEXT: addi r1, r1, 48
+; CHECK-LARGECM64-NEXT: ld r0, 16(r1)
+; CHECK-LARGECM64-NEXT: mtlr r0
+; CHECK-LARGECM64-NEXT: blr
+entry:
+ %0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
+ %arrayidx = getelementptr inbounds [60 x i64], ptr %0, i64 0, i64 53
+ store i64 212, ptr %arrayidx, align 8
+ %1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
+ %arrayidx1 = getelementptr inbounds [3000 x i64], ptr %1, i64 0, i64 150
+ store i64 203, ptr %arrayidx1, align 8
+ %2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
+ %arrayidx2 = getelementptr inbounds [800 x i64], ptr %2, i64 0, i64 55
+ store i64 44, ptr %arrayidx2, align 8
+ %3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
+ %arrayidx3 = getelementptr inbounds [3000 x i64], ptr %3, i64 0, i64 250
+ store i64 6, ptr %arrayidx3, align 8
+ %4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
+ %arrayidx4 = getelementptr inbounds [3000 x i64], ptr %4, i64 0, i64 850
+ store i64 100, ptr %arrayidx4, align 8
+ %5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
+ %arrayidx5 = getelementptr inbounds [3000 x i64], ptr %5, i64 0, i64 1050
+ store i64 882, ptr %arrayidx5, align 8
+ %6 = load i64, ptr %arrayidx1, align 8
+ %7 = load i64, ptr %arrayidx3, align 8
+ %8 = load i64, ptr %arrayidx4, align 8
+ %add = add i64 %6, 882
+ %add9 = add i64 %add, %7
+ %add11 = add i64 %add9, %8
+ ret i64 %add11
+}
+
+; Since this function does not have the 'aix-small-local-exec-tls` attribute,
+; only some local-exec variables should have the small-local-exec TLS access
+; sequence (as opposed to all of them).
+define i64 @StoreLargeAccess2() {
----------------
orcguru wrote:
This case may update to:
define i64 @StoreLargeAccess2() {
entry:
%tls0 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS6)
%arrayidx = getelementptr inbounds [60 x i64], ptr %tls0, i64 0, i64 53
store i64 212, ptr %arrayidx, align 8
%tls1 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS2)
%arrayidx1 = getelementptr inbounds [3000 x i64], ptr %tls1, i64 0, i64 150
store i64 203, ptr %arrayidx1, align 8
%tls2 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @MyTLSGDVar)
%arrayidx2 = getelementptr inbounds [800 x i64], ptr %tls2, i64 0, i64 55
store i64 44, ptr %arrayidx2, align 8
%tls3 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS3)
%arrayidx3 = getelementptr inbounds [3000 x i64], ptr %tls3, i64 0, i64 250
store i64 6, ptr %arrayidx3, align 8
%tls4 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS4)
%arrayidx4 = getelementptr inbounds [3000 x i64], ptr %tls4, i64 0, i64 850
store i64 100, ptr %arrayidx4, align 8
%tls5 = tail call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @mySmallLocalExecTLS5)
%arrayidx5 = getelementptr inbounds [3000 x i64], ptr %tls5, i64 0, i64 1050
store i64 882, ptr %arrayidx5, align 8
%load1 = load i64, ptr %arrayidx1, align 8
%load2 = load i64, ptr %arrayidx3, align 8
%load3 = load i64, ptr %arrayidx4, align 8
%add = add i64 %load1, 882
%add9 = add i64 %add, %load2
%add11 = add i64 %add9, %load3
ret i64 %add11
}
https://github.com/llvm/llvm-project/pull/83053
More information about the llvm-commits
mailing list