[llvm] [RegisterPressure] Remove dead defs correctly (PR #156576)

via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 3 22:30:16 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Hongyu Chen (XChy)

<details>
<summary>Changes</summary>

Fixes https://github.com/llvm/llvm-project/issues/155807.
IIUC, we should remove the dead registers in `RegOpers.Defs`, instead of `RegOpers.DeadDefs` when collecting operands.

In the testcase, LiveVariable Analysis transforms `inlineasm rax, eax` to `inlineasm dead rax, eax` instead of `inlineasm dead rax, dead eax`. But `eax` is actually dead too, as its super register is dead. However, RegisterOperandsCollector still considers `eax` an alive register. We should remove `eax` from the `Defs`, instead of removing the `eax` from DeadDefs.

It's terrible to debug and figure out how the assertion is triggered. I am sorry for failing to explain the entire crash process.

---

Patch is 28.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156576.diff


11 Files Affected:

- (modified) llvm/lib/CodeGen/RegisterPressure.cpp (+4-4) 
- (added) llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir (+19) 
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+79-67) 
- (added) llvm/test/CodeGen/X86/inline-asm-pr155807.ll (+21) 
- (added) llvm/test/CodeGen/X86/inline-asm-pr76416.ll (+74) 
- (modified) llvm/test/CodeGen/X86/scalar_widen_div.ll (+5-5) 
- (modified) llvm/test/CodeGen/X86/ssub_sat_plus.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/udiv_fix.ll (+17-15) 
- (modified) llvm/test/CodeGen/X86/udiv_fix_sat.ll (+42-40) 
- (modified) llvm/test/CodeGen/X86/usub_sat_plus.ll (+2-2) 
- (modified) llvm/test/CodeGen/X86/xmulo.ll (+6-6) 


``````````diff
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index 5f3789050b813..e43e459f4f247 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -477,8 +477,8 @@ class RegisterOperandsCollector {
       collectOperand(*OperI);
 
     // Remove redundant physreg dead defs.
-    for (const VRegMaskOrUnit &P : RegOpers.Defs)
-      removeRegLanes(RegOpers.DeadDefs, P);
+    for (const VRegMaskOrUnit &P : RegOpers.DeadDefs)
+      removeRegLanes(RegOpers.Defs, P);
   }
 
   void collectInstrLanes(const MachineInstr &MI) const {
@@ -486,8 +486,8 @@ class RegisterOperandsCollector {
       collectOperandLanes(*OperI);
 
     // Remove redundant physreg dead defs.
-    for (const VRegMaskOrUnit &P : RegOpers.Defs)
-      removeRegLanes(RegOpers.DeadDefs, P);
+    for (const VRegMaskOrUnit &P : RegOpers.DeadDefs)
+      removeRegLanes(RegOpers.Defs, P);
   }
 
   /// Push this operand's register onto the correct vectors.
diff --git a/llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir b/llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir
new file mode 100644
index 0000000000000..340d8d6485038
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir
@@ -0,0 +1,19 @@
+# RUN: llc -mtriple=x86_64-unknown-unknown -run-pass=machine-scheduler -verify-misched -o - %s
+
+---
+name:            vga_load_state
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $rdi
+
+    %0:gr64 = COPY $rdi
+    undef %1.sub_32bit:gr64_nosp = MOV32r0 implicit-def dead $eflags
+    dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax
+    INLINEASM &"", 1 /* sideeffect attdialect */
+    dead $eax = MOV32r0 implicit-def dead $eflags
+    INLINEASM &"", 1 /* sideeffect attdialect */
+    dead $eax = MOV32r0 implicit-def dead $eflags
+    dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax
+
+...
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 859e9244d29d2..5c8784cf19338 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -776,145 +776,157 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, ptr %divdst) nounwi
 define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y, ptr %divdst) nounwind {
 ; X86-LABEL: vector_i128_i16:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    pushl %eax
 ; X86-NEXT:    pextrw $7, %xmm0, %eax
-; X86-NEXT:    pextrw $7, %xmm1, %esi
+; X86-NEXT:    pextrw $7, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm2
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %esi
 ; X86-NEXT:    pextrw $6, %xmm0, %eax
-; X86-NEXT:    pextrw $6, %xmm1, %esi
+; X86-NEXT:    pextrw $6, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
+; X86-NEXT:    divw %cx
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm3
-; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X86-NEXT:    pextrw $5, %xmm0, %eax
-; X86-NEXT:    pextrw $5, %xmm1, %esi
+; X86-NEXT:    pextrw $5, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm4
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %edi
 ; X86-NEXT:    pextrw $4, %xmm0, %eax
-; X86-NEXT:    pextrw $4, %xmm1, %esi
+; X86-NEXT:    pextrw $4, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm2
-; X86-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %ebx
 ; X86-NEXT:    pextrw $3, %xmm0, %eax
-; X86-NEXT:    pextrw $3, %xmm1, %esi
+; X86-NEXT:    pextrw $3, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm4
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    pextrw $2, %xmm0, %eax
-; X86-NEXT:    pextrw $2, %xmm1, %esi
+; X86-NEXT:    pextrw $2, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm3
-; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movd %esi, %xmm3
 ; X86-NEXT:    pextrw $1, %xmm0, %eax
 ; X86-NEXT:    pextrw $1, %xmm1, %esi
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    divw %si
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    movd (%esp), %xmm4 # 4-byte Folded Reload
+; X86-NEXT:    # xmm4 = mem[0],zero,zero,zero
+; X86-NEXT:    movd %edi, %xmm5
+; X86-NEXT:    movd %ebx, %xmm2
+; X86-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X86-NEXT:    movd %ebp, %xmm6
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X86-NEXT:    movd %ecx, %xmm3
+; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
 ; X86-NEXT:    movd %eax, %xmm4
 ; X86-NEXT:    movd %xmm0, %eax
-; X86-NEXT:    movd %xmm1, %esi
+; X86-NEXT:    movd %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
+; X86-NEXT:    divw %cx
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
 ; X86-NEXT:    movd %eax, %xmm5
 ; X86-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
 ; X86-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
 ; X86-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
-; X86-NEXT:    movdqa %xmm5, (%ecx)
+; X86-NEXT:    movdqa %xmm5, (%esi)
 ; X86-NEXT:    pmullw %xmm1, %xmm5
 ; X86-NEXT:    psubw %xmm5, %xmm0
+; X86-NEXT:    addl $4, %esp
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: vector_i128_i16:
 ; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    pextrw $7, %xmm0, %eax
 ; X64-NEXT:    pextrw $7, %xmm1, %ecx
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm2
+; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    pextrw $6, %xmm0, %eax
-; X64-NEXT:    pextrw $6, %xmm1, %ecx
+; X64-NEXT:    pextrw $6, %xmm1, %esi
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm3
-; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    divw %si
+; X64-NEXT:    movl %eax, %esi
 ; X64-NEXT:    pextrw $5, %xmm0, %eax
-; X64-NEXT:    pextrw $5, %xmm1, %ecx
+; X64-NEXT:    pextrw $5, %xmm1, %r8d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm4
+; X64-NEXT:    divw %r8w
+; X64-NEXT:    movl %eax, %r8d
 ; X64-NEXT:    pextrw $4, %xmm0, %eax
-; X64-NEXT:    pextrw $4, %xmm1, %ecx
+; X64-NEXT:    pextrw $4, %xmm1, %r9d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm2
-; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X64-NEXT:    divw %r9w
+; X64-NEXT:    movl %eax, %r9d
 ; X64-NEXT:    pextrw $3, %xmm0, %eax
-; X64-NEXT:    pextrw $3, %xmm1, %ecx
+; X64-NEXT:    pextrw $3, %xmm1, %r10d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm3
+; X64-NEXT:    divw %r10w
+; X64-NEXT:    movl %eax, %r10d
 ; X64-NEXT:    pextrw $2, %xmm0, %eax
-; X64-NEXT:    pextrw $2, %xmm1, %ecx
+; X64-NEXT:    pextrw $2, %xmm1, %r11d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm4
-; X64-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X64-NEXT:    divw %r11w
+; X64-NEXT:    movl %eax, %r11d
 ; X64-NEXT:    pextrw $1, %xmm0, %eax
-; X64-NEXT:    pextrw $1, %xmm1, %ecx
+; X64-NEXT:    pextrw $1, %xmm1, %ebx
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
+; X64-NEXT:    divw %bx
 ; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm3
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    movd %esi, %xmm3
+; X64-NEXT:    movd %r8d, %xmm4
+; X64-NEXT:    movd %r9d, %xmm5
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    movd %r10d, %xmm2
+; X64-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X64-NEXT:    movd %r11d, %xmm3
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    movd %eax, %xmm2
 ; X64-NEXT:    movd %xmm0, %eax
 ; X64-NEXT:    movd %xmm1, %ecx
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divw %cx
 ; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm5
-; X64-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
-; X64-NEXT:    movdqa %xmm5, (%rdi)
-; X64-NEXT:    pmullw %xmm1, %xmm5
-; X64-NEXT:    psubw %xmm5, %xmm0
+; X64-NEXT:    movd %eax, %xmm4
+; X64-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; X64-NEXT:    movdqa %xmm4, (%rdi)
+; X64-NEXT:    pmullw %xmm1, %xmm4
+; X64-NEXT:    psubw %xmm4, %xmm0
+; X64-NEXT:    popq %rbx
 ; X64-NEXT:    retq
   %div = udiv <8 x i16> %x, %y
   store <8 x i16> %div, ptr %divdst, align 16
diff --git a/llvm/test/CodeGen/X86/inline-asm-pr155807.ll b/llvm/test/CodeGen/X86/inline-asm-pr155807.ll
new file mode 100644
index 0000000000000..3c10773dbb66c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-pr155807.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
+
+; LiveVariable Analysis transforms (inlineasm rax, eax) to (inlineasm dead rax, eax)
+; instead of (inlineasm dead rax, dead eax). RegisterPressure fails at considering
+; eax as dead register.
+define i64 @pr155807(i64 %vecext) {
+; CHECK-LABEL: pr155807:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movabsq $705425148255374, %rax # imm = 0x2819497609C8E
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    idivq %rdi
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  tail call void asm sideeffect "", "~{rax},~{eax},~{dirflag},~{fpsr},~{flags}"()
+  %rem = srem i64 705425148255374, %vecext
+  ret i64 %rem
+}
diff --git a/llvm/test/CodeGen/X86/inline-asm-pr76416.ll b/llvm/test/CodeGen/X86/inline-asm-pr76416.ll
new file mode 100644
index 0000000000000..16b27de394bfa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-pr76416.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O2 -mtriple=x86_64 < %s | FileCheck %s
+%struct.anon.5.28.78.99.149.119 = type { [4 x i8] }
+
+ at vga_load_state_p = external dso_local global ptr, align 8
+ at vga_load_state_data = external dso_local global i8, align 1
+
+define dso_local void @vga_load_state() #0 {
+; CHECK-LABEL: vga_load_state:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jg .LBB0_3
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    incl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jle .LBB0_2
+; CHECK-NEXT:  .LBB0_3: # %for.end
+; CHECK-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_4: # %for.cond1
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movq vga_load_state_p(%rip), %rax
+; CHECK-NEXT:    movslq -{{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT:    movzbl (%rax,%rcx), %eax
+; CHECK-NEXT:    movb %al, vga_load_state_data(%rip)
+; CHECK-NEXT:    leal 1(%rcx), %eax
+; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jmp .LBB0_4
+entry:
+  %i = alloca i32, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %0 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %0, 4
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) #2
+  %1 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  store i32 0, ptr %i, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond1, %for.end
+  call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) #2
+  %2 = load ptr, ptr @vga_load_state_p, align 8
+  %regs = getelementptr inbounds %struct.anon.5.28.78.99.149.119, ptr %2, i32 0, i32 0
+  %3 = load i32, ptr %i, align 4
+  %idxprom = sext i32 %3 to i64
+  %arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom
+  %4 = load i8, ptr %arrayidx, align 1
+  store i8 %4, ptr @vga_load_state_data, align 1
+  %5 = load i32, ptr %i, align 4
+  %inc5 = add nsw i32 %5, 1
+  store i32 %inc5, ptr %i, align 4
+  br label %for.cond1, !llvm.loop !1
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.mustprogress"}
diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll
index 1d98b4f62069d..d50d8ed7aaf6a 100644
--- a/llvm/test/CodeGen/X86/scalar_widen_div.ll
+++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll
@@ -150,15 +150,15 @@ define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divw %si
-; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
-; CHECK-NEXT:    movd %eax, %xmm2
-; CHECK-NEXT:    pinsrw $1, %ecx, %xmm2
+; CHECK-NEXT:    movl %eax, %esi
 ; CHECK-NEXT:    pextrw $2, %xmm0, %eax
-; CHECK-NEXT:    pextrw $2, %xmm1, %ecx
+; CHECK-NEXT:    pextrw $2, %xmm1, %edi
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divw %cx
+; CHECK-NEXT:    divw %di
 ; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    movd %esi, %xmm2
+; CHECK-NEXT:    pinsrw $1, %ecx, %xmm2
 ; CHECK-NEXT:    pinsrw $2, %eax, %xmm2
 ; CHECK-NEXT:    pextrw $3, %xmm0, %eax
 ; CHECK-NEXT:    pextrw $3, %xmm1, %ecx
diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
index 5baf7a1dac74c..8b96a8050e65e 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
@@ -105,9 +105,9 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
 define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    xorl %ecx, %ecx
 ; X86-NEXT:    cmpb %al, %dl
 ; X86-NEXT:    setns %cl
@@ -140,9 +140,9 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    sarb $4, %al
 ; X86-NEXT:    subb %al, %cl
diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll
index 82dfeeee13293..f49dec5714ff0 100644
--- a/llvm/test/CodeGen/X86/udiv_fix.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix.ll
@@ -243,28 +243,30 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:    movdqa %xmm1, %xmm3
 ; X64-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
 ; X64-NEXT:    movq %xmm3, %rcx
-; X64-NEXT:    movdqa %xmm0, %xmm4
-; X64-NEXT:    punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; X64-NEXT:    psllq $31, %xmm4
-; X64-NEXT:    movq %xmm4, %rax
+; X64-NEXT:    movdqa %xmm0, %xmm3
+; X64-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    psllq $31, %xmm3
+; X64-NEXT:    movq %xmm3, %rax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm3
-; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
-; X64-NEXT:    movq %xmm4, %rax
-; X64-NEXT:    movdqa %xmm1, %xmm4
-; X64-NEXT:    psrldq {{.*#+}} xmm4 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X64-NEXT:    movq %xmm4, %rcx
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
+; X64-NEXT:    movq %xmm3, %rax
+; X64-NEXT:    movdqa %xmm1, %xmm3
+; X64-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT:    movq %xmm3, %rsi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm4
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; X64-NEXT:    divq %rsi
+; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X64-NEXT:    psllq $31, %xmm0
 ; X64-NEXT:    movq %xmm0, %rax
-; X64-NEXT:    movd %xmm1, %ecx
+; X64-NEXT:    movd %xmm1, %edi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
+; X64-NEXT:    divq %rdi
+; X64-NEXT:    movq %rcx, %xmm3
+; X64-NEXT:    movq %rsi, %xmm2
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
 ; X64-NEXT:    ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/156576


More information about the llvm-commits mailing list