[llvm] 4d7f9b7 - X86: Don't fold TEST into ADD ... at GOTTPOFF/GOTNTPOFF/INDNTPOFF
James Y Knight via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 12 14:01:49 PDT 2022
Author: James Y Knight
Date: 2022-08-12T20:52:00Z
New Revision: 4d7f9b7489cdd13aac2f3337e3d46fecbb3aaf9f
URL: https://github.com/llvm/llvm-project/commit/4d7f9b7489cdd13aac2f3337e3d46fecbb3aaf9f
DIFF: https://github.com/llvm/llvm-project/commit/4d7f9b7489cdd13aac2f3337e3d46fecbb3aaf9f.diff
LOG: X86: Don't fold TEST into ADD ... at GOTTPOFF/GOTNTPOFF/INDNTPOFF
The linker may convert such an ADD into a LEA, so we must not
use the EFLAGS output.
This causes miscompiles with -fsanitize=null after
bacdf80f42b46044262e97e98398d1bd0b75900d added
llvm.threadlocal.address -- previously, global variables were known to
be non-null, but the intrinsic is not currently known to return
nonnull. (That should be corrected, but it shouldn't've caused
miscompiles!)
Differential Revision: https://reviews.llvm.org/D131716
Added:
llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir
llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir
Modified:
llvm/lib/Target/X86/X86InstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index b130f897c2197..c5904f8407fee 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4082,6 +4082,17 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
NoSignFlag = false;
ClearsOverflowFlag = false;
+ // "ELF Handling for Thread-Local Storage" specifies that x86-64 GOTTPOFF, and
+ // i386 GOTNTPOFF/INDNTPOFF relocations can convert an ADD to a LEA during
+ // Initial Exec to Local Exec relaxation. In these cases, we must not depend
+ // on the EFLAGS modification of ADD actually happening in the final binary.
+ if (MI.getOpcode() == X86::ADD64rm || MI.getOpcode() == X86::ADD32rm) {
+ unsigned Flags = MI.getOperand(5).getTargetFlags();
+ if (Flags == X86II::MO_GOTTPOFF || Flags == X86II::MO_INDNTPOFF ||
+ Flags == X86II::MO_GOTNTPOFF)
+ return false;
+ }
+
switch (MI.getOpcode()) {
default: return false;
diff --git a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir
new file mode 100644
index 0000000000000..8d03401df9cfd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86.mir
@@ -0,0 +1,107 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=peephole-opt -mtriple=i386-- %s -o - | FileCheck %s
+
+# Linkers may change `addq xx at GOTNTPOFF, %reg` to `leaq OFFSET(%reg), %reg`,
+# so we must not depend upon the EFLAGS output. Verify that the TEST
+# instruction won't be folded into the ADD.
+
+# NOTE: the IR will no longer actually produce the input MIR after
+# llvm.threadlocal.address intrinsic is annotated as having a nonnull
+# result.
+
+# NOTE2: the foo_nopic MIR was produced from IR with --relocation-model=static
+# while foo_pic's MIR was produced with --relocation-model=pic.
+
+--- |
+ target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"
+ target triple = "i386-unknown-linux-gnu"
+
+ @x = external thread_local(initialexec) global i32, align 4
+
+ define i32 @foo_nopic() {
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x)
+ %cmp = icmp eq ptr %1, null
+ %zext = zext i1 %cmp to i32
+ ret i32 %zext
+ }
+
+ define i32 @foo_pic() {
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x)
+ %cmp = icmp eq ptr %1, null
+ %zext = zext i1 %cmp to i32
+ ret i32 %zext
+ }
+
+ ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+ declare ptr @llvm.threadlocal.address.p0(ptr) #0
+
+ attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+
+...
+---
+name: foo_nopic
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32 }
+ - { id: 1, class: gr32 }
+ - { id: 2, class: gr8 }
+ - { id: 3, class: gr32 }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0 (%ir-block.0):
+ ; CHECK-LABEL: name: foo_nopic
+ ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
+ ; CHECK-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], $noreg, 1, $noreg, target-flags(x86-indntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
+ ; CHECK-NEXT: TEST32rr [[ADD32rm]], [[ADD32rm]], implicit-def $eflags
+ ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+ ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
+ ; CHECK-NEXT: RET 0, $eax
+ %0:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
+ %1:gr32 = ADD32rm %0, $noreg, 1, $noreg, target-flags(x86-indntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
+ TEST32rr %1, %1, implicit-def $eflags
+ %2:gr8 = SETCCr 4, implicit $eflags
+ %3:gr32 = MOVZX32rr8 killed %2
+ $eax = COPY %3
+ RET 0, $eax
+
+...
+---
+name: foo_pic
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr32_nosp }
+ - { id: 1, class: gr32 }
+ - { id: 2, class: gr32 }
+ - { id: 3, class: gr8 }
+ - { id: 4, class: gr32 }
+ - { id: 5, class: gr32 }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0 (%ir-block.0):
+ ; CHECK-LABEL: name: foo_pic
+ ; CHECK: [[MOVPC32r:%[0-9]+]]:gr32 = MOVPC32r 0, implicit $esp, implicit $ssp
+ ; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:gr32_nosp = ADD32ri [[MOVPC32r]], target-flags(x86-got-absolute-address) &_GLOBAL_OFFSET_TABLE_, implicit-def $eflags
+ ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
+ ; CHECK-NEXT: [[ADD32rm:%[0-9]+]]:gr32 = ADD32rm [[MOV32rm]], [[ADD32ri]], 1, $noreg, target-flags(x86-gotntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
+ ; CHECK-NEXT: TEST32rr [[ADD32rm]], [[ADD32rm]], implicit-def $eflags
+ ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+ ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
+ ; CHECK-NEXT: RET 0, $eax
+ %5:gr32 = MOVPC32r 0, implicit $esp, implicit $ssp
+ %0:gr32_nosp = ADD32ri %5, target-flags(x86-got-absolute-address) &_GLOBAL_OFFSET_TABLE_, implicit-def $eflags
+ %1:gr32 = MOV32rm $noreg, 1, $noreg, 0, $gs :: (load (s32) from `ptr addrspace(256) null`, addrspace 256)
+ %2:gr32 = ADD32rm %1, %0, 1, $noreg, target-flags(x86-gotntpoff) @x, $noreg, implicit-def dead $eflags :: (load (s32) from got)
+ TEST32rr %2, %2, implicit-def $eflags
+ %3:gr8 = SETCCr 4, implicit $eflags
+ %4:gr32 = MOVZX32rr8 killed %3
+ $eax = COPY %4
+ RET 0, $eax
+...
diff --git a/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir
new file mode 100644
index 0000000000000..dadd1be16c5b9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/peephole-nofold-tpoff-x86_64.mir
@@ -0,0 +1,61 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=peephole-opt -mtriple=x86_64-- %s -o - | FileCheck %s
+
+# Linkers may change `addq xx at GOTTPOFF, %reg` to `leaq OFFSET(%reg), %reg`,
+# so we must not depend upon the EFLAGS output. Verify that the TEST
+# instruction won't be folded into the ADD.
+
+# NOTE: the IR will no longer actually produce the input MIR after
+# llvm.threadlocal.address intrinsic is annotated as having a nonnull
+# result.
+
+--- |
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+ target triple = "x86_64-unknown-linux-gnu"
+
+ @x = external thread_local(initialexec) global i32, align 4
+
+ define i32 @foo() {
+ %1 = tail call ptr @llvm.threadlocal.address.p0(ptr nonnull @x)
+ %cmp = icmp eq ptr %1, null
+ %zext = zext i1 %cmp to i32
+ ret i32 %zext
+ }
+
+ ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+ declare ptr @llvm.threadlocal.address.p0(ptr) #0
+
+ attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+
+...
+---
+name: foo
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gr64 }
+ - { id: 1, class: gr64 }
+ - { id: 2, class: gr8 }
+ - { id: 3, class: gr32 }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0 (%ir-block.0):
+ ; CHECK-LABEL: name: foo
+ ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $noreg, 1, $noreg, 0, $fs :: (load (s64) from `ptr addrspace(257) null`, addrspace 257)
+ ; CHECK-NEXT: [[ADD64rm:%[0-9]+]]:gr64 = ADD64rm [[MOV64rm]], $rip, 1, $noreg, target-flags(x86-gottpoff) @x, $noreg, implicit-def dead $eflags :: (load (s64) from got)
+ ; CHECK-NEXT: TEST64rr [[ADD64rm]], [[ADD64rm]], implicit-def $eflags
+ ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
+ ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
+ ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
+ ; CHECK-NEXT: RET 0, $eax
+ %0:gr64 = MOV64rm $noreg, 1, $noreg, 0, $fs :: (load (s64) from `ptr addrspace(257) null`, addrspace 257)
+ %1:gr64 = ADD64rm %0, $rip, 1, $noreg, target-flags(x86-gottpoff) @x, $noreg, implicit-def dead $eflags :: (load (s64) from got)
+ TEST64rr %1, %1, implicit-def $eflags
+ %2:gr8 = SETCCr 4, implicit $eflags
+ %3:gr32 = MOVZX32rr8 killed %2
+ $eax = COPY %3
+ RET 0, $eax
+
+...
More information about the llvm-commits
mailing list