[llvm] f1ae96d - [AArch64][GlobalISel] Fix TLS accesses clobbering registers incorrectly.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 21 16:01:29 PDT 2020


Author: Amara Emerson
Date: 2020-07-21T16:01:17-07:00
New Revision: f1ae96d9bfb807ded8019e851f2663ef18a4130c

URL: https://github.com/llvm/llvm-project/commit/f1ae96d9bfb807ded8019e851f2663ef18a4130c
DIFF: https://github.com/llvm/llvm-project/commit/f1ae96d9bfb807ded8019e851f2663ef18a4130c.diff

LOG: [AArch64][GlobalISel] Fix TLS accesses clobbering registers incorrectly.

This was happening because the BLR didn't have a use of the X0 arg register,
which would end up being re-used in high reg pressure situations.
The change also avoids hard coding the use of X0 for the sequence except to
copy the value for the call. ld64 should still be able to optimize it.

rdar://65438258

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 8a6f79893527..eb6a4aa3d826 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2945,17 +2945,20 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
   const GlobalValue &GV = *I.getOperand(1).getGlobal();
   MachineIRBuilder MIB(I);
 
-  MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
-      .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
+  auto LoadGOT =
+      MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
+          .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
 
   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
-                             {Register(AArch64::X0)})
+                             {LoadGOT.getReg(0)})
                   .addImm(0);
 
+  MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
   // TLS calls preserve all registers except those that absolutely must be
   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
   // silly).
   MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
+      .addUse(AArch64::X0, RegState::Implicit)
       .addDef(AArch64::X0, RegState::Implicit)
       .addRegMask(TRI.getTLSCallPreservedMask());
 

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
new file mode 100644
index 000000000000..cbeac5d85fc4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll
@@ -0,0 +1,207 @@
+; RUN: llc -mtriple aarch64-apple-darwin -O0 -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios13.0.0"
+
+ at t_val = thread_local global i32 0, align 4
+ at .str = private unnamed_addr constant [5 x i8] c"str1\00", align 1
+ at str1 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), align 8
+ at .str.1 = private unnamed_addr constant [5 x i8] c"str2\00", align 1
+ at str2 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.1, i32 0, i32 0), align 8
+ at .str.2 = private unnamed_addr constant [5 x i8] c"str3\00", align 1
+ at str3 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.2, i32 0, i32 0), align 8
+ at .str.3 = private unnamed_addr constant [5 x i8] c"str4\00", align 1
+ at str4 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.3, i32 0, i32 0), align 8
+ at .str.4 = private unnamed_addr constant [5 x i8] c"str5\00", align 1
+ at str5 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.4, i32 0, i32 0), align 8
+ at .str.5 = private unnamed_addr constant [5 x i8] c"str6\00", align 1
+ at str6 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), align 8
+ at .str.6 = private unnamed_addr constant [5 x i8] c"str7\00", align 1
+ at str7 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.6, i32 0, i32 0), align 8
+ at .str.7 = private unnamed_addr constant [5 x i8] c"str8\00", align 1
+ at str8 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.7, i32 0, i32 0), align 8
+ at .str.8 = private unnamed_addr constant [5 x i8] c"str9\00", align 1
+ at str9 = global i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.8, i32 0, i32 0), align 8
+ at .str.9 = private unnamed_addr constant [6 x i8] c"str10\00", align 1
+ at str10 = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.9, i32 0, i32 0), align 8
+ at .str.10 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+ at .str.11 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+ at .str.12 = private unnamed_addr constant [4 x i8] c"xyz\00", align 1
+
+
+; This test checks that we don't re-use the register for the variable descriptor
+; for the second ldr.
+; CHECK:        adrp	x[[PTR1:[0-9]+]], _t_val at TLVPPAGE
+; CHECK:	ldr	x[[PTR1]], [x[[PTR1]], _t_val at TLVPPAGEOFF]
+; CHECK:	ldr	x[[FPTR:[0-9]+]], [x[[PTR1]]]
+; CHECK:        mov	x0, x[[PTR1]]
+; CHECK:        blr     x[[FPTR]]
+
+define void @_Z4funcPKc(i8* %id) {
+entry:
+  %id.addr = alloca i8*, align 8
+  store i8* %id, i8** %id.addr, align 8
+  %0 = load i8*, i8** %id.addr, align 8
+  %1 = load i8*, i8** @str1, align 8
+  %cmp = icmp eq i8* %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %2 = load i8*, i8** @str1, align 8
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %2)
+  %3 = load i8*, i8** @str2, align 8
+  %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %3)
+  %4 = load i8*, i8** @str3, align 8
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %4)
+  %5 = load i8*, i8** @str4, align 8
+  %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %5)
+  %6 = load i8*, i8** @str5, align 8
+  %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %6)
+  %7 = load i8*, i8** @str6, align 8
+  %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %7)
+  %8 = load i8*, i8** @str7, align 8
+  %call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %8)
+  %9 = load i8*, i8** @str8, align 8
+  %call7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %9)
+  %10 = load i8*, i8** @str9, align 8
+  %call8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %10)
+  %11 = load i8*, i8** @str10, align 8
+  %call9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %11)
+  %12 = load i32, i32* @t_val, align 4
+  %call10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.11, i64 0, i64 0), i32 %12)
+  br label %if.end56
+
+if.else:                                          ; preds = %entry
+  %13 = load i8*, i8** %id.addr, align 8
+  %14 = load i8*, i8** @str2, align 8
+  %cmp11 = icmp eq i8* %13, %14
+  br i1 %cmp11, label %if.then12, label %if.else24
+
+if.then12:                                        ; preds = %if.else
+  %15 = load i8*, i8** @str1, align 8
+  %call13 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %15)
+  %16 = load i8*, i8** @str2, align 8
+  %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %16)
+  %17 = load i8*, i8** @str3, align 8
+  %call15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %17)
+  %18 = load i8*, i8** @str4, align 8
+  %call16 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %18)
+  %19 = load i8*, i8** @str5, align 8
+  %call17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %19)
+  %20 = load i8*, i8** @str6, align 8
+  %call18 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %20)
+  %21 = load i8*, i8** @str7, align 8
+  %call19 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %21)
+  %22 = load i8*, i8** @str8, align 8
+  %call20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %22)
+  %23 = load i8*, i8** @str9, align 8
+  %call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %23)
+  %24 = load i8*, i8** @str10, align 8
+  %call22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.10, i64 0, i64 0), i8* %24)
+  %25 = load i32, i32* @t_val, align 4
+  %call23 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.11, i64 0, i64 0), i32 %25)
+  br label %if.end55
+
+if.else24:                                        ; preds = %if.else
+  %26 = load i8*, i8** %id.addr, align 8
+  %27 = load i8*, i8** @str3, align 8
+  %cmp25 = icmp eq i8* %26, %27
+  br i1 %cmp25, label %if.then26, label %if.else27
+
+if.then26:                                        ; preds = %if.else24
+  br label %if.end54
+
+if.else27:                                        ; preds = %if.else24
+  %28 = load i8*, i8** %id.addr, align 8
+  %29 = load i8*, i8** @str4, align 8
+  %cmp28 = icmp eq i8* %28, %29
+  br i1 %cmp28, label %if.then29, label %if.else30
+
+if.then29:                                        ; preds = %if.else27
+  br label %if.end53
+
+if.else30:                                        ; preds = %if.else27
+  %30 = load i8*, i8** %id.addr, align 8
+  %31 = load i8*, i8** @str5, align 8
+  %cmp31 = icmp eq i8* %30, %31
+  br i1 %cmp31, label %if.then32, label %if.else33
+
+if.then32:                                        ; preds = %if.else30
+  br label %if.end52
+
+if.else33:                                        ; preds = %if.else30
+  %32 = load i8*, i8** %id.addr, align 8
+  %33 = load i8*, i8** @str6, align 8
+  %cmp34 = icmp eq i8* %32, %33
+  br i1 %cmp34, label %if.then35, label %if.else36
+
+if.then35:                                        ; preds = %if.else33
+  br label %if.end51
+
+if.else36:                                        ; preds = %if.else33
+  %34 = load i8*, i8** %id.addr, align 8
+  %35 = load i8*, i8** @str7, align 8
+  %cmp37 = icmp eq i8* %34, %35
+  br i1 %cmp37, label %if.then38, label %if.else39
+
+if.then38:                                        ; preds = %if.else36
+  br label %if.end50
+
+if.else39:                                        ; preds = %if.else36
+  %36 = load i8*, i8** %id.addr, align 8
+  %37 = load i8*, i8** @str8, align 8
+  %cmp40 = icmp eq i8* %36, %37
+  br i1 %cmp40, label %if.then41, label %if.else42
+
+if.then41:                                        ; preds = %if.else39
+  br label %if.end49
+
+if.else42:                                        ; preds = %if.else39
+  %38 = load i8*, i8** %id.addr, align 8
+  %39 = load i8*, i8** @str9, align 8
+  %cmp43 = icmp eq i8* %38, %39
+  br i1 %cmp43, label %if.then44, label %if.else45
+
+if.then44:                                        ; preds = %if.else42
+  br label %if.end48
+
+if.else45:                                        ; preds = %if.else42
+  %40 = load i8*, i8** %id.addr, align 8
+  %41 = load i8*, i8** @str10, align 8
+  %cmp46 = icmp eq i8* %40, %41
+  br i1 %cmp46, label %if.then47, label %if.end
+
+if.then47:                                        ; preds = %if.else45
+  br label %if.end
+
+if.end:                                           ; preds = %if.then47, %if.else45
+  br label %if.end48
+
+if.end48:                                         ; preds = %if.end, %if.then44
+  br label %if.end49
+
+if.end49:                                         ; preds = %if.end48, %if.then41
+  br label %if.end50
+
+if.end50:                                         ; preds = %if.end49, %if.then38
+  br label %if.end51
+
+if.end51:                                         ; preds = %if.end50, %if.then35
+  br label %if.end52
+
+if.end52:                                         ; preds = %if.end51, %if.then32
+  br label %if.end53
+
+if.end53:                                         ; preds = %if.end52, %if.then29
+  br label %if.end54
+
+if.end54:                                         ; preds = %if.end53, %if.then26
+  br label %if.end55
+
+if.end55:                                         ; preds = %if.end54, %if.then12
+  br label %if.end56
+
+if.end56:                                         ; preds = %if.end55, %if.then
+  ret void
+}
+declare i32 @printf(i8*, ...)
+

diff  --git a/llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll b/llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll
index b90848bc9fae..06d214b1b0ac 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll
@@ -9,8 +9,8 @@
 define i8 @get_var() {
 ; CHECK-LABEL: get_var:
 ; CHECK: adrp x[[TLVPDESC_SLOT_HI:[0-9]+]], _var at TLVPPAGE
-; CHECK: ldr x0, [x[[TLVPDESC_SLOT_HI]], _var at TLVPPAGEOFF]
-; CHECK: ldr [[TLV_GET_ADDR:x[0-9]+]], [x0]
+ ; CHECK: ldr x[[PTR:[0-9]+]], [x[[TLVPDESC_SLOT_HI]], _var at TLVPPAGEOFF]
+ ; CHECK: ldr [[TLV_GET_ADDR:x[0-9]+]], [x[[PTR]]]
 ; CHECK: blr [[TLV_GET_ADDR]]
 ; CHECK: ldrb w0, [x0]
 


        


More information about the llvm-commits mailing list