[llvm] c26dfc8 - [HACK] X86: Disable isCopyInstrImpl for undef subregister defs

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 28 10:33:34 PDT 2023


Author: Matt Arsenault
Date: 2023-07-28T13:33:28-04:00
New Revision: c26dfc81e254c78dc23579cf3d1336f77249e1f6

URL: https://github.com/llvm/llvm-project/commit/c26dfc81e254c78dc23579cf3d1336f77249e1f6
DIFF: https://github.com/llvm/llvm-project/commit/c26dfc81e254c78dc23579cf3d1336f77249e1f6.diff

LOG: [HACK] X86: Disable isCopyInstrImpl for undef subregister defs

This is a workaround for a coalescer bug where coalescing
SUBREG_TO_REG ends up losing the liveness of the high bits of the
source register. The result is an incorrect undef subregister def
instead of preserving the high values. Work around the observed
failure after the resulting mov is eliminated during allocation until
a proper fix is ready. I believe the proper fix is to make
SUBREG_TO_REG use a tied operand.

The test should catch a regression originally observed after
b7836d856206ec39509d42529f958c920368166b and should not show a
difference after a496c8be6e638ae58bb45f13113dbe3a4b7b23fd is reverted.

https://reviews.llvm.org/D156164

Added: 
    llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll

Modified: 
    llvm/lib/Target/X86/X86InstrInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 10a0ccdcb02329..fd219fa5943b90 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3648,8 +3648,15 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
 std::optional<DestSourcePair>
 X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
-  if (MI.isMoveReg())
+  if (MI.isMoveReg()) {
+    // FIXME: Dirty hack for apparent invariant that doesn't hold when
+    // subreg_to_reg is coalesced with ordinary copies, such that the bits that
+    // were asserted as 0 are now undef.
+    if (MI.getOperand(0).isUndef() && MI.getOperand(0).getSubReg())
+      return std::nullopt;
+
     return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+  }
   return std::nullopt;
 }
 

diff  --git a/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll
new file mode 100644
index 00000000000000..63061b0d851b6b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=x86_64-grtev4-linux-gnu < %s | FileCheck %s
+
+; Test a bad interaction between register allocation and the register
+; coalescer. The coalescer lost the high subregister def when
+; SUBREG_TO_REG was used to implement i32->i64 zext. The allocator
+; then recognized the undef subregister defing MOV as a copy
+; instruction, resulting in the users seeing 
diff erent undef values.
+
+
+%struct.wibble = type { %struct.wombat }
+%struct.wombat = type { %struct.ham, [3 x i8] }
+%struct.ham = type { %struct.zot }
+%struct.zot = type { %struct.blam }
+%struct.blam = type { %struct.ham.0 }
+%struct.ham.0 = type { %struct.bar }
+%struct.bar = type { %struct.bar.1 }
+%struct.bar.1 = type { %struct.baz, i8 }
+%struct.baz = type { %struct.snork }
+%struct.snork = type <{ %struct.spam, i8, [3 x i8] }>
+%struct.spam = type { %struct.snork.2, %struct.snork.2 }
+%struct.snork.2 = type { i32 }
+
+define void @foo(ptr %arg3, i1 %icmp16) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    movl %esi, %ebx
+; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT:    xorl %r15d, %r15d
+; CHECK-NEXT:    xorl %r12d, %r12d
+; CHECK-NEXT:    # implicit-def: $r13
+; CHECK-NEXT:    jmp .LBB0_2
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_1: # %bb5
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    orl $1, %r12d
+; CHECK-NEXT:    movq %r14, %r15
+; CHECK-NEXT:  .LBB0_2: # %bb7
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    callq *%rax
+; CHECK-NEXT:    movl %r13d, %r13d
+; CHECK-NEXT:    testb $1, %bl
+; CHECK-NEXT:    movl $0, %r14d
+; CHECK-NEXT:    jne .LBB0_1
+; CHECK-NEXT:  # %bb.3: # %bb17
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    xorl %r14d, %r14d
+; CHECK-NEXT:    testq %r15, %r15
+; CHECK-NEXT:    sete %r14b
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    callq *%rax
+; CHECK-NEXT:    shlq $4, %r14
+; CHECK-NEXT:    addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
+; CHECK-NEXT:    movl %r13d, 0
+; CHECK-NEXT:    movb $0, 4
+; CHECK-NEXT:    jmp .LBB0_1
+bb:
+  br label %bb7
+
+bb5:                                              ; preds = %bb17, %bb7
+  %phi6 = phi ptr [ %getelementptr, %bb17 ], [ null, %bb7 ]
+  %add = or i32 %phi9, 1
+  %icmp = icmp eq i32 %phi9, 0
+  br label %bb7
+
+bb7:                                              ; preds = %bb5, %bb
+  %phi8 = phi ptr [ null, %bb ], [ %phi6, %bb5 ]
+  %phi9 = phi i32 [ 0, %bb ], [ %add, %bb5 ]
+  %phi10 = phi i40 [ undef, %bb ], [ %and, %bb5 ]
+  %call = call ptr null()
+  %and = and i40 %phi10, 4294967295
+  %icmp161 = icmp ugt ptr %phi8, null
+  br i1 %icmp16, label %bb5, label %bb17
+
+bb17:                                             ; preds = %bb7
+  %icmp18 = icmp eq ptr %phi8, null
+  %zext = zext i1 %icmp18 to i64
+  %call19 = call ptr null(i64 0)
+  %getelementptr = getelementptr %struct.wibble, ptr %arg3, i64 %zext
+  store i40 %and, ptr null, align 4
+  br label %bb5
+}
+
+attributes #0 = { "frame-pointer"="all" }


        


More information about the llvm-commits mailing list