[llvm] 300a550 - RegisterCoalescer: Fix implicit operand handling during rematerialize (#75271)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 13 00:15:40 PST 2023


Author: Matt Arsenault
Date: 2023-12-13T15:15:36+07:00
New Revision: 300a55003c7ef7b3a87844a88781e4fdcba860ff

URL: https://github.com/llvm/llvm-project/commit/300a55003c7ef7b3a87844a88781e4fdcba860ff
DIFF: https://github.com/llvm/llvm-project/commit/300a55003c7ef7b3a87844a88781e4fdcba860ff.diff

LOG: RegisterCoalescer: Fix implicit operand handling during rematerialize (#75271)

If the rematerialize was placing a subregister into a super register,
and implicit operands referenced the original register, we need to add
undef flags to the now-subregister indexed implicit operands.

Depends #75152

Added: 
    llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
    llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir

Modified: 
    llvm/lib/CodeGen/RegisterCoalescer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 2ef91e78f3370..c1af37c8510ff 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -1611,8 +1611,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
   }
 
-  if (NewMI.getOperand(0).getSubReg())
-    NewMI.getOperand(0).setIsUndef();
+  NewMI.setRegisterDefReadUndef(NewMI.getOperand(0).getReg());
 
   // Transfer over implicit operands to the rematerialized instruction.
   for (MachineOperand &MO : ImplicitOps)

diff  --git a/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll b/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
new file mode 100644
index 0000000000000..4a396599a4444
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-dead-flag-verifier-error.ll
@@ -0,0 +1,131 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -verify-coalescing < %s | FileCheck %s
+
+%"class.llvm::APInt." = type <{ %union.anon., i32, [4 x i8] }>
+%union.anon. = type { i64 }
+
+define void @_ZNK4llvm5APInt21multiplicativeInverseERKS0_(ptr %r) {
+; CHECK-LABEL: _ZNK4llvm5APInt21multiplicativeInverseERKS0_:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    jmp .LBB0_1
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_4: # %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    movl %edx, %edx
+; CHECK-NEXT:    shlq $4, %rdx
+; CHECK-NEXT:    movl $0, (%rdi,%rdx)
+; CHECK-NEXT:    movl %ecx, %edx
+; CHECK-NEXT:  .LBB0_1: # %bb
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_3 Depth 2
+; CHECK-NEXT:    xorl $1, %ecx
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    movq %rcx, %r8
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB0_4
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_3: # %for.body.i.i.i.i.i.3
+; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    orq $1, %r8
+; CHECK-NEXT:    orq $1, %rsi
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    je .LBB0_3
+; CHECK-NEXT:    jmp .LBB0_4
+entry:
+  br label %bb
+
+bb:                                               ; preds = %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %xor, %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i ]
+  %xor = xor i32 %i.0, 1
+  %idxprom = zext nneg i32 %xor to i64
+  br label %for.body.i.i.i.i.i
+
+for.body.i.i.i.i.i:                               ; preds = %for.body.i.i.i.i.i.3, %bb
+  %lsr.iv37 = phi i64 [ %lsr.iv.next38, %for.body.i.i.i.i.i.3 ], [ 0, %bb ]
+  %lsr.iv = phi i64 [ %lsr.iv.next, %for.body.i.i.i.i.i.3 ], [ %idxprom, %bb ]
+  %exitcond.not.i.i.i.i.i.2 = icmp eq i64 0, 1
+  br i1 %exitcond.not.i.i.i.i.i.2, label %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, label %for.body.i.i.i.i.i.3
+
+for.body.i.i.i.i.i.3:                             ; preds = %for.body.i.i.i.i.i
+  %sunkaddr55 = mul i64 %lsr.iv37, 0
+  %i = xor i64 %lsr.iv, 0
+  %lsr.iv.next = or i64 %lsr.iv, 1
+  %lsr.iv.next38 = or i64 %lsr.iv37, 1
+  br label %for.body.i.i.i.i.i
+
+_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i:        ; preds = %for.body.i.i.i.i.i
+  %idxprom12 = zext nneg i32 %i.0 to i64
+  %arrayidx13 = getelementptr [2 x %"class.llvm::APInt."], ptr %r, i64 0, i64 %idxprom12
+  store i32 0, ptr %arrayidx13, align 4
+  br label %bb
+}
+
+; This variant hit an assert and never reached the verifier error
+define void @_ZNK4llvm5APInt21multiplicativeInverseERKS0__assert(ptr %r) {
+; CHECK-LABEL: _ZNK4llvm5APInt21multiplicativeInverseERKS0__assert:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    jmp .LBB1_1
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB1_4: # %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i
+; CHECK-NEXT:    # in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT:    movl %edx, %edx
+; CHECK-NEXT:    shlq $4, %rdx
+; CHECK-NEXT:    movl $0, (%rdi,%rdx)
+; CHECK-NEXT:    movl %ecx, %edx
+; CHECK-NEXT:  .LBB1_1: # %bb
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB1_3 Depth 2
+; CHECK-NEXT:    xorl $1, %ecx
+; CHECK-NEXT:    movq %rcx, %rsi
+; CHECK-NEXT:    shlq $4, %rsi
+; CHECK-NEXT:    movq (%rsi), %rsi
+; CHECK-NEXT:    xorl %r8d, %r8d
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB1_4
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB1_3: # %for.body.i.i.i.i.i.3
+; CHECK-NEXT:    # Parent Loop BB1_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    orq $1, %rsi
+; CHECK-NEXT:    orq $1, %r8
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    je .LBB1_3
+; CHECK-NEXT:    jmp .LBB1_4
+entry:
+  br label %bb
+
+bb:                                               ; preds = %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %xor, %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i ]
+  %xor = xor i32 %i.0, 1
+  %idxprom = zext nneg i32 %xor to i64
+  %arrayidx = getelementptr [2 x %"class.llvm::APInt."], ptr null, i64 0, i64 %idxprom
+  %i.i.i.i.i.i = load ptr, ptr %arrayidx, align 16
+  %i.i.i.i.i.i36 = ptrtoint ptr %i.i.i.i.i.i to i64
+  br label %for.body.i.i.i.i.i
+
+for.body.i.i.i.i.i:                               ; preds = %for.body.i.i.i.i.i.3, %bb
+  %lsr.iv37 = phi i64 [ %lsr.iv.next38, %for.body.i.i.i.i.i.3 ], [ 0, %bb ]
+  %lsr.iv = phi i64 [ %lsr.iv.next, %for.body.i.i.i.i.i.3 ], [ %i.i.i.i.i.i36, %bb ]
+  %exitcond.not.i.i.i.i.i.2 = icmp eq i64 0, 1
+  br i1 %exitcond.not.i.i.i.i.i.2, label %_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i, label %for.body.i.i.i.i.i.3
+
+for.body.i.i.i.i.i.3:                             ; preds = %for.body.i.i.i.i.i
+  %sunkaddr55 = mul i64 %lsr.iv37, 0
+  %i = xor i64 %lsr.iv, 0
+  %lsr.iv.next = or i64 %lsr.iv, 1
+  %lsr.iv.next38 = or i64 %lsr.iv37, 1
+  br label %for.body.i.i.i.i.i
+
+_ZNK4llvm5APInt13getActiveBitsEv.exit.i.i:        ; preds = %for.body.i.i.i.i.i
+  %idxprom12 = zext nneg i32 %i.0 to i64
+  %arrayidx13 = getelementptr [2 x %"class.llvm::APInt."], ptr %r, i64 0, i64 %idxprom12
+  store i32 0, ptr %arrayidx13, align 4
+  br label %bb
+}

diff  --git a/llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir b/llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir
new file mode 100644
index 0000000000000..42d17130412b6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/coalescer-remat-with-undef-implicit-def-operand.mir
@@ -0,0 +1,123 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=x86_64-pc-linux-gnu -verify-coalescing -run-pass=register-coalescer -o - %s | FileCheck %s
+
+# The %1 = MOV32r0 is rematerialized as a subregister of %2. The
+# implicit-def %1 operand needs to have an undef added, just like the
+# main result operand.
+
+---
+name:           remat_into_subregister_set_undef_implicit_operand_subregisters
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $rdi
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+  ; CHECK-NEXT:   [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_1]]
+  ; CHECK-NEXT:   undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_32bit, implicit-def [[MOV32r0_2]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   JCC_1 %bb.4, 5, implicit killed undef $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
+  ; CHECK-NEXT:   dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
+  ; CHECK-NEXT:   [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  bb.0:
+    liveins: $rdi
+
+    undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+    %1:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def %1
+    undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
+
+  bb.1:
+    %2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags
+
+  bb.2:
+    JCC_1 %bb.4, 5, implicit killed undef $eflags
+
+  bb.3:
+
+  bb.4:
+    dead %3:gr32 = MOV32rr %1
+    dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
+    %1:gr32 = COPY %2.sub_32bit
+    JMP_1 %bb.1
+
+...
+
+# Same, except the implicit-def on the original instruction already
+# has a subregister index.
+
+---
+name:           remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: remat_into_subregister_set_undef_implicit_operand_subregisters_with_subreg
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $rdi
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   undef [[MOV32r0_:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def [[MOV32r0_]]
+  ; CHECK-NEXT:   [[MOV32r0_1:%[0-9]+]]:gr32 = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_1]].sub_8bit
+  ; CHECK-NEXT:   undef [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def undef [[MOV32r0_2]].sub_8bit, implicit-def [[MOV32r0_2]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[MOV32r0_2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = XOR32ri [[MOV32r0_2]].sub_32bit, 1, implicit-def dead $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.4(0x40000000), %bb.3(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   JCC_1 %bb.4, 5, implicit killed undef $eflags
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[MOV32rr:%[0-9]+]]:gr32 = MOV32rr [[MOV32r0_1]]
+  ; CHECK-NEXT:   dead [[SHL64ri:%[0-9]+]]:gr64_nosp = SHL64ri [[MOV32r0_]], 4, implicit-def dead $eflags
+  ; CHECK-NEXT:   [[MOV32r0_1:%[0-9]+]]:gr32 = COPY [[MOV32r0_2]].sub_32bit
+  ; CHECK-NEXT:   JMP_1 %bb.1
+  bb.0:
+    liveins: $rdi
+
+    undef %0.sub_32bit:gr64_with_sub_8bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
+    %1:gr32 = MOV32r0 implicit-def dead $eflags, undef implicit-def %1.sub_8bit
+    undef %2.sub_32bit:gr64_with_sub_8bit = COPY %1, implicit-def %2
+
+  bb.1:
+    %2.sub_32bit:gr64_with_sub_8bit = XOR32ri %2.sub_32bit, 1, implicit-def dead $eflags
+
+  bb.2:
+    JCC_1 %bb.4, 5, implicit killed undef $eflags
+
+  bb.3:
+
+  bb.4:
+    dead %3:gr32 = MOV32rr %1
+    dead %4:gr64_nosp = SHL64ri %0, 4, implicit-def dead $eflags
+    %1:gr32 = COPY %2.sub_32bit
+    JMP_1 %bb.1
+
+...


        


More information about the llvm-commits mailing list