[llvm] 4d7f9b7 - X86: Don't fold TEST into ADD ... at GOTTPOFF/GOTNTPOFF/INDNTPOFF

James Y Knight via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 12 14:01:49 PDT 2022


Author: James Y Knight
Date: 2022-08-12T20:52:00Z
New Revision: 4d7f9b7489cdd13aac2f3337e3d46fecbb3aaf9f

URL: https://github.com/llvm/llvm-project/commit/4d7f9b7489cdd13aac2f3337e3d46fecbb3aaf9f
DIFF: https://github.com/llvm/llvm-project/commit/4d7f9b7489cdd13aac2f3337e3d46fecbb3aaf9f.diff

LOG: X86: Don't fold TEST into ADD ... at GOTTPOFF/GOTNTPOFF/INDNTPOFF

The linker may convert such an ADD into a LEA, so we must not
use the EFLAGS output.

This causes miscompiles with -fsanitize=null after
bacdf80f42b46044262e97e98398d1bd0b75900d added
llvm.threadlocal.address -- previously, global variables were known to
be non-null, but the intrinsic is not currently known to return
nonnull. (That should be corrected, but it shouldn't've caused
miscompiles!)

Differential Revision: https://reviews.llvm.org/D131716

Added: 
    llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir
    llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir

Modified: 
    llvm/lib/Target/X86/X86InstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index b130f897c2197..c5904f8407fee 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4082,6 +4082,17 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
   NoSignFlag = false;
   ClearsOverflowFlag = false;
 
+  // "ELF Handling for Thread-Local Storage" specifies that x86-64 GOTTPOFF, and
+  // i386 GOTNTPOFF/INDNTPOFF relocations can convert an ADD to a LEA during
+  // Initial Exec to Local Exec relaxation. In these cases, we must not depend
+  // on the EFLAGS modification of ADD actually happening in the final binary.
+  if (MI.getOpcode() == X86::ADD64rm || MI.getOpcode() == X86::ADD32rm) {
+    unsigned Flags = MI.getOperand(5).getTargetFlags();
+    if (Flags == X86II::MO_GOTTPOFF || Flags == X86II::MO_INDNTPOFF ||
+        Flags == X86II::MO_GOTNTPOFF)
+      return false;
+  }
+
   switch (MI.getOpcode()) {
   default: return false;
 

diff  --git a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir
new file mode 100644
index 0000000000000..8d03401df9cfd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir
@@ -0,0 +1,107 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=peephole-opt -mtriple=i386-- %s -o - | FileCheck %s
+
+# Linkers may change `addq xx at GOTNTPOFF, %reg` to `leaq OFFSET(%reg), %reg`,
+# so we must not depend upon the EFLAGS output. Verify that the TEST
+# instruction won't be folded into the ADD.
+
+# NOTE: the IR will no longer actually produce the input MIR after
+# llvm.threadlocal.address intrinsic is annotated as having a nonnull
+# result.
+
+# NOTE2: the foo_nopic MIR was produced from IR with --relocation-model=static
+# while foo_pic's MIR was produced with --relocation-model=pic.
+
+--- |
+  target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
+  target triple = "i386-unknown-linux-gnu"
+
+  @x = external thread_local(initialexec) global i32, align 4
+
+  define i32 @foo_nopic() {
+    %1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x)
+    %cmp = icmp eq ptr %1, null
+    %zext = zext i1 %cmp to i32
+    ret i32 %zext
+  }
+
+  define i32 @foo_pic() {
+    %1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x)
+    %cmp = icmp eq ptr %1, null
+    %zext = zext i1 %cmp to i32
+    ret i32 %zext
+  }
+
+  ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+  declare ptr @llvm.threadlocal.address.p0(ptr) #0
+
+  attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+
+...
+---
+name:            foo_nopic
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+  - { id: 1, class: gr32 }
+  - { id: 2, class: gr8 }
+  - { id: 3, class: gr32 }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    ; CHECK-LABEL: name: foo_nopic
+    ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
+    ; CHECK-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], $noreg, 1, $noreg, target-flags(x86-indntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
+    ; CHECK-NEXT: TEST32rr [[ADD32rm]], [[ADD32rm]], implicit-def $eflags
+    ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+    ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+    ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
+    ; CHECK-NEXT: RET 0, $eax
+    %0:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
+    %1:gr32 = ADD32rm %0, $noreg, 1, $noreg, target-flags(x86-indntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
+    TEST32rr %1, %1, implicit-def $eflags
+    %2:gr8 = SETCCr 4, implicit $eflags
+    %3:gr32 = MOVZX32rr8 killed %2
+    $eax = COPY %3
+    RET 0, $eax
+
+...
+---
+name:            foo_pic
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32_nosp }
+  - { id: 1, class: gr32 }
+  - { id: 2, class: gr32 }
+  - { id: 3, class: gr8 }
+  - { id: 4, class: gr32 }
+  - { id: 5, class: gr32 }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    ; CHECK-LABEL: name: foo_pic
+    ; CHECK: [[MOVPC32r:%[0-9]+]]:gr32 = MOVPC32r 0, implicit $esp, implicit $ssp
+    ; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:gr32_nosp = ADD32ri [[MOVPC32r]], target-flags(x86-got-absolute-address) &_GLOBAL_OFFSET_TABLE_, implicit-def $eflags
+    ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
+    ; CHECK-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[ADD32ri]], 1, $noreg, target-flags(x86-gotntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
+    ; CHECK-NEXT: TEST32rr [[ADD32rm]], [[ADD32rm]], implicit-def $eflags
+    ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+    ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+    ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
+    ; CHECK-NEXT: RET 0, $eax
+    %5:gr32 = MOVPC32r 0, implicit $esp, implicit $ssp
+    %0:gr32_nosp = ADD32ri %5, target-flags(x86-got-absolute-address) &_GLOBAL_OFFSET_TABLE_, implicit-def $eflags
+    %1:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
+    %2:gr32 = ADD32rm %1, %0, 1, $noreg, target-flags(x86-gotntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
+    TEST32rr %2, %2, implicit-def $eflags
+    %3:gr8 = SETCCr 4, implicit $eflags
+    %4:gr32 = MOVZX32rr8 killed %3
+    $eax = COPY %4
+    RET 0, $eax
+...

diff  --git a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir
new file mode 100644
index 0000000000000..dadd1be16c5b9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir
@@ -0,0 +1,61 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=peephole-opt -mtriple=x86_64-- %s -o - | FileCheck %s
+
+# Linkers may change `addq xx at GOTTPOFF, %reg` to `leaq OFFSET(%reg), %reg`,
+# so we must not depend upon the EFLAGS output. Verify that the TEST
+# instruction won't be folded into the ADD.
+
+# NOTE: the IR will no longer actually produce the input MIR after
+# llvm.threadlocal.address intrinsic is annotated as having a nonnull
+# result.
+
+--- |
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux-gnu"
+
+  @x = external thread_local(initialexec) global i32, align 4
+
+  define i32 @foo() {
+    %1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x)
+    %cmp = icmp eq ptr %1, null
+    %zext = zext i1 %cmp to i32
+    ret i32 %zext
+  }
+
+  ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+  declare ptr @llvm.threadlocal.address.p0(ptr) #0
+
+  attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+
+...
+---
+name:            foo
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr64 }
+  - { id: 1, class: gr64 }
+  - { id: 2, class: gr8 }
+  - { id: 3, class: gr32 }
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    ; CHECK-LABEL: name: foo
+    ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $noreg, 1, $noreg, 0, $fs :: (load (s64) from `ptr addrspace(257) null`, addrspace 257)
+    ; CHECK-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[MOV64rm]], $rip, 1, $noreg, target-flags(x86-gottpoff) @x, $noreg, implicit-def dead $eflags :: (load (s64) from got)
+    ; CHECK-NEXT: TEST64rr [[ADD64rm]], [[ADD64rm]], implicit-def $eflags
+    ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+    ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+    ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
+    ; CHECK-NEXT: RET 0, $eax
+    %0:gr64 = MOV64rm $noreg, 1, $noreg, 0, $fs :: (load (s64) from `ptr addrspace(257) null`, addrspace 257)
+    %1:gr64 = ADD64rm %0, $rip, 1, $noreg, target-flags(x86-gottpoff) @x, $noreg, implicit-def dead $eflags :: (load (s64) from got)
+    TEST64rr %1, %1, implicit-def $eflags
+    %2:gr8 = SETCCr 4, implicit $eflags
+    %3:gr32 = MOVZX32rr8 killed %2
+    $eax = COPY %3
+    RET 0, $eax
+
+...


        


More information about the llvm-commits mailing list