[llvm] [RegisterPressure] Remove dead defs correctly (PR #156576)

Hongyu Chen via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 3 22:29:41 PDT 2025


https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/156576

>From 649698efd37e36643e833ed59c57ae1dd56e25a9 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Wed, 3 Sep 2025 02:16:19 +0800
Subject: [PATCH 1/2] [RegisterPressure] Remove dead defs correctly

---
 llvm/lib/CodeGen/RegisterPressure.cpp         |   8 +-
 .../div-rem-pair-recomposition-unsigned.ll    | 146 ++++++++++--------
 .../CodeGen/X86/inline-asm-clobber-subreg.ll  |  18 +++
 llvm/test/CodeGen/X86/scalar_widen_div.ll     |  10 +-
 llvm/test/CodeGen/X86/ssub_sat_plus.ll        |   4 +-
 llvm/test/CodeGen/X86/udiv_fix.ll             |  32 ++--
 llvm/test/CodeGen/X86/udiv_fix_sat.ll         |  82 +++++-----
 llvm/test/CodeGen/X86/usub_sat_plus.ll        |   4 +-
 llvm/test/CodeGen/X86/xmulo.ll                |  12 +-
 9 files changed, 175 insertions(+), 141 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll

diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index 5f3789050b813..e43e459f4f247 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -477,8 +477,8 @@ class RegisterOperandsCollector {
       collectOperand(*OperI);
 
     // Remove redundant physreg dead defs.
-    for (const VRegMaskOrUnit &P : RegOpers.Defs)
-      removeRegLanes(RegOpers.DeadDefs, P);
+    for (const VRegMaskOrUnit &P : RegOpers.DeadDefs)
+      removeRegLanes(RegOpers.Defs, P);
   }
 
   void collectInstrLanes(const MachineInstr &MI) const {
@@ -486,8 +486,8 @@ class RegisterOperandsCollector {
       collectOperandLanes(*OperI);
 
     // Remove redundant physreg dead defs.
-    for (const VRegMaskOrUnit &P : RegOpers.Defs)
-      removeRegLanes(RegOpers.DeadDefs, P);
+    for (const VRegMaskOrUnit &P : RegOpers.DeadDefs)
+      removeRegLanes(RegOpers.Defs, P);
   }
 
   /// Push this operand's register onto the correct vectors.
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 859e9244d29d2..5c8784cf19338 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -776,145 +776,157 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, ptr %divdst) nounwi
 define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y, ptr %divdst) nounwind {
 ; X86-LABEL: vector_i128_i16:
 ; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    pushl %eax
 ; X86-NEXT:    pextrw $7, %xmm0, %eax
-; X86-NEXT:    pextrw $7, %xmm1, %esi
+; X86-NEXT:    pextrw $7, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm2
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %esi
 ; X86-NEXT:    pextrw $6, %xmm0, %eax
-; X86-NEXT:    pextrw $6, %xmm1, %esi
+; X86-NEXT:    pextrw $6, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
+; X86-NEXT:    divw %cx
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm3
-; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
 ; X86-NEXT:    pextrw $5, %xmm0, %eax
-; X86-NEXT:    pextrw $5, %xmm1, %esi
+; X86-NEXT:    pextrw $5, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm4
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %edi
 ; X86-NEXT:    pextrw $4, %xmm0, %eax
-; X86-NEXT:    pextrw $4, %xmm1, %esi
+; X86-NEXT:    pextrw $4, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm2
-; X86-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %ebx
 ; X86-NEXT:    pextrw $3, %xmm0, %eax
-; X86-NEXT:    pextrw $3, %xmm1, %esi
+; X86-NEXT:    pextrw $3, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm4
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %ebp
 ; X86-NEXT:    pextrw $2, %xmm0, %eax
-; X86-NEXT:    pextrw $2, %xmm1, %esi
+; X86-NEXT:    pextrw $2, %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
-; X86-NEXT:    # kill: def $ax killed $ax def $eax
-; X86-NEXT:    movd %eax, %xmm3
-; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X86-NEXT:    divw %cx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movd %esi, %xmm3
 ; X86-NEXT:    pextrw $1, %xmm0, %eax
 ; X86-NEXT:    pextrw $1, %xmm1, %esi
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    divw %si
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    movd (%esp), %xmm4 # 4-byte Folded Reload
+; X86-NEXT:    # xmm4 = mem[0],zero,zero,zero
+; X86-NEXT:    movd %edi, %xmm5
+; X86-NEXT:    movd %ebx, %xmm2
+; X86-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X86-NEXT:    movd %ebp, %xmm6
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X86-NEXT:    movd %ecx, %xmm3
+; X86-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
 ; X86-NEXT:    movd %eax, %xmm4
 ; X86-NEXT:    movd %xmm0, %eax
-; X86-NEXT:    movd %xmm1, %esi
+; X86-NEXT:    movd %xmm1, %ecx
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    xorl %edx, %edx
-; X86-NEXT:    divw %si
+; X86-NEXT:    divw %cx
 ; X86-NEXT:    # kill: def $ax killed $ax def $eax
 ; X86-NEXT:    movd %eax, %xmm5
 ; X86-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
 ; X86-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
 ; X86-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
-; X86-NEXT:    movdqa %xmm5, (%ecx)
+; X86-NEXT:    movdqa %xmm5, (%esi)
 ; X86-NEXT:    pmullw %xmm1, %xmm5
 ; X86-NEXT:    psubw %xmm5, %xmm0
+; X86-NEXT:    addl $4, %esp
 ; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: vector_i128_i16:
 ; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    pextrw $7, %xmm0, %eax
 ; X64-NEXT:    pextrw $7, %xmm1, %ecx
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm2
+; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    pextrw $6, %xmm0, %eax
-; X64-NEXT:    pextrw $6, %xmm1, %ecx
+; X64-NEXT:    pextrw $6, %xmm1, %esi
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm3
-; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    divw %si
+; X64-NEXT:    movl %eax, %esi
 ; X64-NEXT:    pextrw $5, %xmm0, %eax
-; X64-NEXT:    pextrw $5, %xmm1, %ecx
+; X64-NEXT:    pextrw $5, %xmm1, %r8d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm4
+; X64-NEXT:    divw %r8w
+; X64-NEXT:    movl %eax, %r8d
 ; X64-NEXT:    pextrw $4, %xmm0, %eax
-; X64-NEXT:    pextrw $4, %xmm1, %ecx
+; X64-NEXT:    pextrw $4, %xmm1, %r9d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm2
-; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X64-NEXT:    divw %r9w
+; X64-NEXT:    movl %eax, %r9d
 ; X64-NEXT:    pextrw $3, %xmm0, %eax
-; X64-NEXT:    pextrw $3, %xmm1, %ecx
+; X64-NEXT:    pextrw $3, %xmm1, %r10d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm3
+; X64-NEXT:    divw %r10w
+; X64-NEXT:    movl %eax, %r10d
 ; X64-NEXT:    pextrw $2, %xmm0, %eax
-; X64-NEXT:    pextrw $2, %xmm1, %ecx
+; X64-NEXT:    pextrw $2, %xmm1, %r11d
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
-; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm4
-; X64-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X64-NEXT:    divw %r11w
+; X64-NEXT:    movl %eax, %r11d
 ; X64-NEXT:    pextrw $1, %xmm0, %eax
-; X64-NEXT:    pextrw $1, %xmm1, %ecx
+; X64-NEXT:    pextrw $1, %xmm1, %ebx
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divw %cx
+; X64-NEXT:    divw %bx
 ; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm3
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    movd %esi, %xmm3
+; X64-NEXT:    movd %r8d, %xmm4
+; X64-NEXT:    movd %r9d, %xmm5
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    movd %r10d, %xmm2
+; X64-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X64-NEXT:    movd %r11d, %xmm3
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    movd %eax, %xmm2
 ; X64-NEXT:    movd %xmm0, %eax
 ; X64-NEXT:    movd %xmm1, %ecx
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divw %cx
 ; X64-NEXT:    # kill: def $ax killed $ax def $eax
-; X64-NEXT:    movd %eax, %xmm5
-; X64-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; X64-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
-; X64-NEXT:    movdqa %xmm5, (%rdi)
-; X64-NEXT:    pmullw %xmm1, %xmm5
-; X64-NEXT:    psubw %xmm5, %xmm0
+; X64-NEXT:    movd %eax, %xmm4
+; X64-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; X64-NEXT:    movdqa %xmm4, (%rdi)
+; X64-NEXT:    pmullw %xmm1, %xmm4
+; X64-NEXT:    psubw %xmm4, %xmm0
+; X64-NEXT:    popq %rbx
 ; X64-NEXT:    retq
   %div = udiv <8 x i16> %x, %y
   store <8 x i16> %div, ptr %divdst, align 16
diff --git a/llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll b/llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll
new file mode 100644
index 0000000000000..638a8eeb2d472
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s
+
+define i64 @foo(i64 %vecext) {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movabsq $705425148255374, %rax # imm = 0x2819497609C8E
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    idivq %rdi
+; CHECK-NEXT:    movq %rdx, %rax
+; CHECK-NEXT:    retq
+entry:
+  tail call void asm sideeffect "", "~{rax},~{eax},~{dirflag},~{fpsr},~{flags}"()
+  %rem = srem i64 705425148255374, %vecext
+  ret i64 %rem
+}
diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll
index 1d98b4f62069d..d50d8ed7aaf6a 100644
--- a/llvm/test/CodeGen/X86/scalar_widen_div.ll
+++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll
@@ -150,15 +150,15 @@ define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    xorl %edx, %edx
 ; CHECK-NEXT:    divw %si
-; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
-; CHECK-NEXT:    movd %eax, %xmm2
-; CHECK-NEXT:    pinsrw $1, %ecx, %xmm2
+; CHECK-NEXT:    movl %eax, %esi
 ; CHECK-NEXT:    pextrw $2, %xmm0, %eax
-; CHECK-NEXT:    pextrw $2, %xmm1, %ecx
+; CHECK-NEXT:    pextrw $2, %xmm1, %edi
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divw %cx
+; CHECK-NEXT:    divw %di
 ; CHECK-NEXT:    # kill: def $ax killed $ax def $eax
+; CHECK-NEXT:    movd %esi, %xmm2
+; CHECK-NEXT:    pinsrw $1, %ecx, %xmm2
 ; CHECK-NEXT:    pinsrw $2, %eax, %xmm2
 ; CHECK-NEXT:    pextrw $3, %xmm0, %eax
 ; CHECK-NEXT:    pextrw $3, %xmm1, %ecx
diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
index 5baf7a1dac74c..8b96a8050e65e 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
@@ -105,9 +105,9 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
 define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    xorl %ecx, %ecx
 ; X86-NEXT:    cmpb %al, %dl
 ; X86-NEXT:    setns %cl
@@ -140,9 +140,9 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    shlb $4, %al
 ; X86-NEXT:    sarb $4, %al
 ; X86-NEXT:    subb %al, %cl
diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll
index 82dfeeee13293..f49dec5714ff0 100644
--- a/llvm/test/CodeGen/X86/udiv_fix.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix.ll
@@ -243,28 +243,30 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:    movdqa %xmm1, %xmm3
 ; X64-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
 ; X64-NEXT:    movq %xmm3, %rcx
-; X64-NEXT:    movdqa %xmm0, %xmm4
-; X64-NEXT:    punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; X64-NEXT:    psllq $31, %xmm4
-; X64-NEXT:    movq %xmm4, %rax
+; X64-NEXT:    movdqa %xmm0, %xmm3
+; X64-NEXT:    punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    psllq $31, %xmm3
+; X64-NEXT:    movq %xmm3, %rax
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm3
-; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
-; X64-NEXT:    movq %xmm4, %rax
-; X64-NEXT:    movdqa %xmm1, %xmm4
-; X64-NEXT:    psrldq {{.*#+}} xmm4 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X64-NEXT:    movq %xmm4, %rcx
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
+; X64-NEXT:    movq %xmm3, %rax
+; X64-NEXT:    movdqa %xmm1, %xmm3
+; X64-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT:    movq %xmm3, %rsi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm4
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; X64-NEXT:    divq %rsi
+; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; X64-NEXT:    psllq $31, %xmm0
 ; X64-NEXT:    movq %xmm0, %rax
-; X64-NEXT:    movd %xmm1, %ecx
+; X64-NEXT:    movd %xmm1, %edi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
+; X64-NEXT:    divq %rdi
+; X64-NEXT:    movq %rcx, %xmm3
+; X64-NEXT:    movq %rsi, %xmm2
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
 ; X64-NEXT:    movq %rax, %xmm2
 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
 ; X64-NEXT:    movq %xmm0, %rax
diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
index 3da5973f9f903..ad95deabb2e55 100644
--- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -320,59 +320,61 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:    movq %xmm4, %rcx
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm8
+; X64-NEXT:    movq %rax, %rcx
 ; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
 ; X64-NEXT:    movq %xmm3, %rax
 ; X64-NEXT:    movdqa %xmm1, %xmm3
 ; X64-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X64-NEXT:    movq %xmm3, %rcx
+; X64-NEXT:    movq %xmm3, %rsi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm3
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm3[0]
-; X64-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
-; X64-NEXT:    movdqa %xmm8, %xmm3
-; X64-NEXT:    pxor %xmm4, %xmm3
-; X64-NEXT:    pshufd {{.*#+}} xmm6 = xmm3[1,1,3,3]
-; X64-NEXT:    movdqa {{.*#+}} xmm7 = [2147483649,2147483649,2147483649,2147483649]
-; X64-NEXT:    pcmpeqd %xmm7, %xmm6
-; X64-NEXT:    movdqa {{.*#+}} xmm5 = [9223372043297226751,9223372043297226751]
-; X64-NEXT:    movdqa %xmm5, %xmm9
-; X64-NEXT:    pcmpgtd %xmm3, %xmm9
-; X64-NEXT:    pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2]
-; X64-NEXT:    pand %xmm6, %xmm10
-; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm9[1,1,3,3]
-; X64-NEXT:    por %xmm10, %xmm3
-; X64-NEXT:    pcmpeqd %xmm6, %xmm6
-; X64-NEXT:    pand %xmm3, %xmm8
-; X64-NEXT:    pxor %xmm6, %xmm3
-; X64-NEXT:    por %xmm8, %xmm3
-; X64-NEXT:    psrlq $1, %xmm3
+; X64-NEXT:    divq %rsi
+; X64-NEXT:    movq %rax, %rsi
 ; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
 ; X64-NEXT:    movq %xmm2, %rax
-; X64-NEXT:    movd %xmm1, %ecx
+; X64-NEXT:    movd %xmm1, %edi
 ; X64-NEXT:    xorl %edx, %edx
-; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm8
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3]
-; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    divq %rdi
+; X64-NEXT:    movq %rcx, %xmm7
+; X64-NEXT:    movq %rsi, %xmm0
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm0[0]
+; X64-NEXT:    movdqa {{.*#+}} xmm0 = [9223372039002259456,9223372039002259456]
+; X64-NEXT:    movdqa %xmm7, %xmm3
+; X64-NEXT:    pxor %xmm0, %xmm3
+; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; X64-NEXT:    movdqa {{.*#+}} xmm6 = [2147483649,2147483649,2147483649,2147483649]
+; X64-NEXT:    pcmpeqd %xmm6, %xmm4
+; X64-NEXT:    movdqa {{.*#+}} xmm5 = [9223372043297226751,9223372043297226751]
+; X64-NEXT:    movdqa %xmm5, %xmm8
+; X64-NEXT:    pcmpgtd %xmm3, %xmm8
+; X64-NEXT:    pshufd {{.*#+}} xmm9 = xmm8[0,0,2,2]
+; X64-NEXT:    pand %xmm4, %xmm9
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm8[1,1,3,3]
+; X64-NEXT:    por %xmm9, %xmm3
+; X64-NEXT:    pcmpeqd %xmm4, %xmm4
+; X64-NEXT:    pand %xmm3, %xmm7
+; X64-NEXT:    pxor %xmm4, %xmm3
+; X64-NEXT:    por %xmm7, %xmm3
+; X64-NEXT:    psrlq $1, %xmm3
+; X64-NEXT:    movq %rax, %xmm7
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; X64-NEXT:    movq %xmm2, %rax
 ; X64-NEXT:    psrlq $32, %xmm1
 ; X64-NEXT:    movq %xmm1, %rcx
 ; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    divq %rcx
-; X64-NEXT:    movq %rax, %xmm0
-; X64-NEXT:    punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm0[0]
-; X64-NEXT:    pxor %xmm8, %xmm4
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
-; X64-NEXT:    pcmpeqd %xmm7, %xmm0
-; X64-NEXT:    pcmpgtd %xmm4, %xmm5
-; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm5[0,0,2,2]
-; X64-NEXT:    pand %xmm0, %xmm1
+; X64-NEXT:    movq %rax, %xmm1
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm1[0]
+; X64-NEXT:    pxor %xmm7, %xmm0
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X64-NEXT:    pcmpeqd %xmm6, %xmm1
+; X64-NEXT:    pcmpgtd %xmm0, %xmm5
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[0,0,2,2]
+; X64-NEXT:    pand %xmm1, %xmm2
 ; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[1,1,3,3]
-; X64-NEXT:    por %xmm1, %xmm0
-; X64-NEXT:    pxor %xmm0, %xmm6
-; X64-NEXT:    pand %xmm8, %xmm0
-; X64-NEXT:    por %xmm6, %xmm0
+; X64-NEXT:    por %xmm2, %xmm0
+; X64-NEXT:    pxor %xmm0, %xmm4
+; X64-NEXT:    pand %xmm7, %xmm0
+; X64-NEXT:    por %xmm4, %xmm0
 ; X64-NEXT:    psrlq $1, %xmm0
 ; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/usub_sat_plus.ll b/llvm/test/CodeGen/X86/usub_sat_plus.ll
index 0fb14ad5cf7b0..725de4401cb87 100644
--- a/llvm/test/CodeGen/X86/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/usub_sat_plus.ll
@@ -82,9 +82,9 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw
 define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
 ; X86-LABEL: func8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    subb %al, %cl
 ; X86-NEXT:    movzbl %cl, %eax
@@ -111,9 +111,9 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind {
 define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind {
 ; X86-LABEL: func4:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    mulb {{[0-9]+}}(%esp)
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    andb $15, %al
 ; X86-NEXT:    xorl %edx, %edx
 ; X86-NEXT:    subb %al, %cl
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index a076d0d762aa3..657547800075c 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -90,9 +90,9 @@ define zeroext i1 @smuloi8(i8 %v1, i8 %v2, ptr %res) {
 ;
 ; WIN32-LABEL: smuloi8:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -303,9 +303,9 @@ define zeroext i1 @umuloi8(i8 %v1, i8 %v2, ptr %res) {
 ;
 ; WIN32-LABEL: umuloi8:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -1386,10 +1386,10 @@ define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
 ;
 ; WIN32-LABEL: smuloi8_load:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    movzbl (%eax), %eax
 ; WIN32-NEXT:    imulb {{[0-9]+}}(%esp)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -1435,10 +1435,10 @@ define zeroext i1 @smuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) {
 ;
 ; WIN32-LABEL: smuloi8_load2:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; WIN32-NEXT:    imulb (%ecx)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -1829,10 +1829,10 @@ define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) {
 ;
 ; WIN32-LABEL: umuloi8_load:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    movzbl (%eax), %eax
 ; WIN32-NEXT:    mulb {{[0-9]+}}(%esp)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax
@@ -1878,10 +1878,10 @@ define zeroext i1 @umuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) {
 ;
 ; WIN32-LABEL: umuloi8_load2:
 ; WIN32:       # %bb.0:
-; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; WIN32-NEXT:    mulb (%ecx)
+; WIN32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; WIN32-NEXT:    seto %cl
 ; WIN32-NEXT:    movb %al, (%edx)
 ; WIN32-NEXT:    movl %ecx, %eax

>From 8bc6d014eded4c6603c7303690e4aefbcc9ac56e Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Thu, 4 Sep 2025 13:28:04 +0800
Subject: [PATCH 2/2] update testcases

---
 .../CodeGen/MIR/X86/dead-register-pr76416.mir | 19 +++++
 ...obber-subreg.ll => inline-asm-pr155807.ll} |  9 ++-
 llvm/test/CodeGen/X86/inline-asm-pr76416.ll   | 74 +++++++++++++++++++
 3 files changed, 99 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir
 rename llvm/test/CodeGen/X86/{inline-asm-clobber-subreg.ll => inline-asm-pr155807.ll} (63%)
 create mode 100644 llvm/test/CodeGen/X86/inline-asm-pr76416.ll

diff --git a/llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir b/llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir
new file mode 100644
index 0000000000000..340d8d6485038
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir
@@ -0,0 +1,19 @@
+# RUN: llc -mtriple=x86_64-unknown-unknown -run-pass=machine-scheduler -verify-misched -o - %s
+
+---
+name:            vga_load_state
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $rdi
+
+    %0:gr64 = COPY $rdi
+    undef %1.sub_32bit:gr64_nosp = MOV32r0 implicit-def dead $eflags
+    dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax
+    INLINEASM &"", 1 /* sideeffect attdialect */
+    dead $eax = MOV32r0 implicit-def dead $eflags
+    INLINEASM &"", 1 /* sideeffect attdialect */
+    dead $eax = MOV32r0 implicit-def dead $eflags
+    dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax
+
+...
diff --git a/llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll b/llvm/test/CodeGen/X86/inline-asm-pr155807.ll
similarity index 63%
rename from llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll
rename to llvm/test/CodeGen/X86/inline-asm-pr155807.ll
index 638a8eeb2d472..3c10773dbb66c 100644
--- a/llvm/test/CodeGen/X86/inline-asm-clobber-subreg.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-pr155807.ll
@@ -1,8 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
 
-define i64 @foo(i64 %vecext) {
-; CHECK-LABEL: foo:
+; LiveVariable Analysis transforms (inlineasm rax, eax) to (inlineasm dead rax, eax)
+; instead of (inlineasm dead rax, dead eax). RegisterPressure fails at considering
+; eax as dead register.
+define i64 @pr155807(i64 %vecext) {
+; CHECK-LABEL: pr155807:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
diff --git a/llvm/test/CodeGen/X86/inline-asm-pr76416.ll b/llvm/test/CodeGen/X86/inline-asm-pr76416.ll
new file mode 100644
index 0000000000000..16b27de394bfa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-pr76416.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O2 -mtriple=x86_64 < %s | FileCheck %s
+%struct.anon.5.28.78.99.149.119 = type { [4 x i8] }
+
+ at vga_load_state_p = external dso_local global ptr, align 8
+ at vga_load_state_data = external dso_local global i8, align 1
+
+define dso_local void @vga_load_state() #0 {
+; CHECK-LABEL: vga_load_state:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jg .LBB0_3
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    incl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jle .LBB0_2
+; CHECK-NEXT:  .LBB0_3: # %for.end
+; CHECK-NEXT:    movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_4: # %for.cond1
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movq vga_load_state_p(%rip), %rax
+; CHECK-NEXT:    movslq -{{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT:    movzbl (%rax,%rcx), %eax
+; CHECK-NEXT:    movb %al, vga_load_state_data(%rip)
+; CHECK-NEXT:    leal 1(%rcx), %eax
+; CHECK-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    jmp .LBB0_4
+entry:
+  %i = alloca i32, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %0 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %0, 4
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) #2
+  %1 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  store i32 0, ptr %i, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond1, %for.end
+  call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) #2
+  %2 = load ptr, ptr @vga_load_state_p, align 8
+  %regs = getelementptr inbounds %struct.anon.5.28.78.99.149.119, ptr %2, i32 0, i32 0
+  %3 = load i32, ptr %i, align 4
+  %idxprom = sext i32 %3 to i64
+  %arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom
+  %4 = load i8, ptr %arrayidx, align 1
+  store i8 %4, ptr @vga_load_state_data, align 1
+  %5 = load i32, ptr %i, align 4
+  %inc5 = add nsw i32 %5, 1
+  store i32 %inc5, ptr %i, align 4
+  br label %for.cond1, !llvm.loop !1
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.mustprogress"}



More information about the llvm-commits mailing list