[llvm] [RegisterPressure] Remove dead defs correctly (PR #156576)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 3 22:30:16 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Hongyu Chen (XChy)
<details>
<summary>Changes</summary>
Fixes https://github.com/llvm/llvm-project/issues/155807.
IIUC, we should remove the dead registers in `RegOpers.Defs`, instead of `RegOpers.DeadDefs` when collecting operands.
In the testcase, LiveVariable Analysis transforms `inlineasm rax, eax` to `inlineasm dead rax, eax` instead of `inlineasm dead rax, dead eax`. But `eax` is actually dead too, as its super register is dead. However, RegisterOperandsCollector still considers `eax` an alive register. We should remove `eax` from the `Defs`, instead of removing the `eax` from DeadDefs.
It's terrible to debug and figure out how the assertion is triggered. I am sorry for failing to explain the entire crash process.
---
Patch is 28.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156576.diff
11 Files Affected:
- (modified) llvm/lib/CodeGen/RegisterPressure.cpp (+4-4)
- (added) llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir (+19)
- (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+79-67)
- (added) llvm/test/CodeGen/X86/inline-asm-pr155807.ll (+21)
- (added) llvm/test/CodeGen/X86/inline-asm-pr76416.ll (+74)
- (modified) llvm/test/CodeGen/X86/scalar_widen_div.ll (+5-5)
- (modified) llvm/test/CodeGen/X86/ssub_sat_plus.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/udiv_fix.ll (+17-15)
- (modified) llvm/test/CodeGen/X86/udiv_fix_sat.ll (+42-40)
- (modified) llvm/test/CodeGen/X86/usub_sat_plus.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/xmulo.ll (+6-6)
``````````diff
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index 5f3789050b813..e43e459f4f247 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -477,8 +477,8 @@ class RegisterOperandsCollector {
collectOperand(*OperI);
// Remove redundant physreg dead defs.
- for (const VRegMaskOrUnit &P : RegOpers.Defs)
- removeRegLanes(RegOpers.DeadDefs, P);
+ for (const VRegMaskOrUnit &P : RegOpers.DeadDefs)
+ removeRegLanes(RegOpers.Defs, P);
}
void collectInstrLanes(const MachineInstr &MI) const {
@@ -486,8 +486,8 @@ class RegisterOperandsCollector {
collectOperandLanes(*OperI);
// Remove redundant physreg dead defs.
- for (const VRegMaskOrUnit &P : RegOpers.Defs)
- removeRegLanes(RegOpers.DeadDefs, P);
+ for (const VRegMaskOrUnit &P : RegOpers.DeadDefs)
+ removeRegLanes(RegOpers.Defs, P);
}
/// Push this operand's register onto the correct vectors.
diff --git a/llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir b/llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir
new file mode 100644
index 0000000000000..340d8d6485038
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/dead-register-pr76416.mir
@@ -0,0 +1,19 @@
+# RUN: llc -mtriple=x86_64-unknown-unknown -run-pass=machine-scheduler -verify-misched -o - %s
+
+---
+name: vga_load_state
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $rdi
+
+ %0:gr64 = COPY $rdi
+ undef %1.sub_32bit:gr64_nosp = MOV32r0 implicit-def dead $eflags
+ dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax
+ INLINEASM &"", 1 /* sideeffect attdialect */
+ dead $eax = MOV32r0 implicit-def dead $eflags
+ INLINEASM &"", 1 /* sideeffect attdialect */
+ dead $eax = MOV32r0 implicit-def dead $eflags
+ dead $eax = MOV32r0 implicit-def dead $eflags, implicit-def $rax
+
+...
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 859e9244d29d2..5c8784cf19338 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -776,145 +776,157 @@ define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y, ptr %divdst) nounwi
define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y, ptr %divdst) nounwind {
; X86-LABEL: vector_i128_i16:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebp
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: pushl %eax
; X86-NEXT: pextrw $7, %xmm0, %eax
-; X86-NEXT: pextrw $7, %xmm1, %esi
+; X86-NEXT: pextrw $7, %xmm1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divw %si
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: movd %eax, %xmm2
+; X86-NEXT: divw %cx
+; X86-NEXT: movl %eax, %esi
; X86-NEXT: pextrw $6, %xmm0, %eax
-; X86-NEXT: pextrw $6, %xmm1, %esi
+; X86-NEXT: pextrw $6, %xmm1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divw %si
+; X86-NEXT: divw %cx
; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: movd %eax, %xmm3
-; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: pextrw $5, %xmm0, %eax
-; X86-NEXT: pextrw $5, %xmm1, %esi
+; X86-NEXT: pextrw $5, %xmm1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divw %si
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: movd %eax, %xmm4
+; X86-NEXT: divw %cx
+; X86-NEXT: movl %eax, %edi
; X86-NEXT: pextrw $4, %xmm0, %eax
-; X86-NEXT: pextrw $4, %xmm1, %esi
+; X86-NEXT: pextrw $4, %xmm1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divw %si
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: movd %eax, %xmm2
-; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X86-NEXT: divw %cx
+; X86-NEXT: movl %eax, %ebx
; X86-NEXT: pextrw $3, %xmm0, %eax
-; X86-NEXT: pextrw $3, %xmm1, %esi
+; X86-NEXT: pextrw $3, %xmm1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divw %si
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: movd %eax, %xmm4
+; X86-NEXT: divw %cx
+; X86-NEXT: movl %eax, %ebp
; X86-NEXT: pextrw $2, %xmm0, %eax
-; X86-NEXT: pextrw $2, %xmm1, %esi
+; X86-NEXT: pextrw $2, %xmm1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divw %si
-; X86-NEXT: # kill: def $ax killed $ax def $eax
-; X86-NEXT: movd %eax, %xmm3
-; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X86-NEXT: divw %cx
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movd %esi, %xmm3
; X86-NEXT: pextrw $1, %xmm0, %eax
; X86-NEXT: pextrw $1, %xmm1, %esi
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: divw %si
; X86-NEXT: # kill: def $ax killed $ax def $eax
+; X86-NEXT: movd (%esp), %xmm4 # 4-byte Folded Reload
+; X86-NEXT: # xmm4 = mem[0],zero,zero,zero
+; X86-NEXT: movd %edi, %xmm5
+; X86-NEXT: movd %ebx, %xmm2
+; X86-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X86-NEXT: movd %ebp, %xmm6
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X86-NEXT: movd %ecx, %xmm3
+; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
; X86-NEXT: movd %eax, %xmm4
; X86-NEXT: movd %xmm0, %eax
-; X86-NEXT: movd %xmm1, %esi
+; X86-NEXT: movd %xmm1, %ecx
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: divw %si
+; X86-NEXT: divw %cx
; X86-NEXT: # kill: def $ax killed $ax def $eax
; X86-NEXT: movd %eax, %xmm5
; X86-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
; X86-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
; X86-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
-; X86-NEXT: movdqa %xmm5, (%ecx)
+; X86-NEXT: movdqa %xmm5, (%esi)
; X86-NEXT: pmullw %xmm1, %xmm5
; X86-NEXT: psubw %xmm5, %xmm0
+; X86-NEXT: addl $4, %esp
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
; X64-LABEL: vector_i128_i16:
; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
; X64-NEXT: pextrw $7, %xmm0, %eax
; X64-NEXT: pextrw $7, %xmm1, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divw %cx
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: movd %eax, %xmm2
+; X64-NEXT: movl %eax, %ecx
; X64-NEXT: pextrw $6, %xmm0, %eax
-; X64-NEXT: pextrw $6, %xmm1, %ecx
+; X64-NEXT: pextrw $6, %xmm1, %esi
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: divw %cx
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: movd %eax, %xmm3
-; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT: divw %si
+; X64-NEXT: movl %eax, %esi
; X64-NEXT: pextrw $5, %xmm0, %eax
-; X64-NEXT: pextrw $5, %xmm1, %ecx
+; X64-NEXT: pextrw $5, %xmm1, %r8d
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: divw %cx
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: movd %eax, %xmm4
+; X64-NEXT: divw %r8w
+; X64-NEXT: movl %eax, %r8d
; X64-NEXT: pextrw $4, %xmm0, %eax
-; X64-NEXT: pextrw $4, %xmm1, %ecx
+; X64-NEXT: pextrw $4, %xmm1, %r9d
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: divw %cx
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: movd %eax, %xmm2
-; X64-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
-; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X64-NEXT: divw %r9w
+; X64-NEXT: movl %eax, %r9d
; X64-NEXT: pextrw $3, %xmm0, %eax
-; X64-NEXT: pextrw $3, %xmm1, %ecx
+; X64-NEXT: pextrw $3, %xmm1, %r10d
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: divw %cx
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: movd %eax, %xmm3
+; X64-NEXT: divw %r10w
+; X64-NEXT: movl %eax, %r10d
; X64-NEXT: pextrw $2, %xmm0, %eax
-; X64-NEXT: pextrw $2, %xmm1, %ecx
+; X64-NEXT: pextrw $2, %xmm1, %r11d
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: divw %cx
-; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: movd %eax, %xmm4
-; X64-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X64-NEXT: divw %r11w
+; X64-NEXT: movl %eax, %r11d
; X64-NEXT: pextrw $1, %xmm0, %eax
-; X64-NEXT: pextrw $1, %xmm1, %ecx
+; X64-NEXT: pextrw $1, %xmm1, %ebx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: divw %cx
+; X64-NEXT: divw %bx
; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: movd %eax, %xmm3
+; X64-NEXT: movd %ecx, %xmm2
+; X64-NEXT: movd %esi, %xmm3
+; X64-NEXT: movd %r8d, %xmm4
+; X64-NEXT: movd %r9d, %xmm5
+; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT: movd %r10d, %xmm2
+; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X64-NEXT: movd %r11d, %xmm3
+; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT: movd %eax, %xmm2
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: movd %xmm1, %ecx
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divw %cx
; X64-NEXT: # kill: def $ax killed $ax def $eax
-; X64-NEXT: movd %eax, %xmm5
-; X64-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
-; X64-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
-; X64-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm2[0]
-; X64-NEXT: movdqa %xmm5, (%rdi)
-; X64-NEXT: pmullw %xmm1, %xmm5
-; X64-NEXT: psubw %xmm5, %xmm0
+; X64-NEXT: movd %eax, %xmm4
+; X64-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X64-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X64-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0]
+; X64-NEXT: movdqa %xmm4, (%rdi)
+; X64-NEXT: pmullw %xmm1, %xmm4
+; X64-NEXT: psubw %xmm4, %xmm0
+; X64-NEXT: popq %rbx
; X64-NEXT: retq
%div = udiv <8 x i16> %x, %y
store <8 x i16> %div, ptr %divdst, align 16
diff --git a/llvm/test/CodeGen/X86/inline-asm-pr155807.ll b/llvm/test/CodeGen/X86/inline-asm-pr155807.ll
new file mode 100644
index 0000000000000..3c10773dbb66c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-pr155807.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
+
+; LiveVariable Analysis transforms (inlineasm rax, eax) to (inlineasm dead rax, eax)
+; instead of (inlineasm dead rax, dead eax). RegisterPressure fails at considering
+; eax as dead register.
+define i64 @pr155807(i64 %vecext) {
+; CHECK-LABEL: pr155807:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: movabsq $705425148255374, %rax # imm = 0x2819497609C8E
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: idivq %rdi
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "~{rax},~{eax},~{dirflag},~{fpsr},~{flags}"()
+ %rem = srem i64 705425148255374, %vecext
+ ret i64 %rem
+}
diff --git a/llvm/test/CodeGen/X86/inline-asm-pr76416.ll b/llvm/test/CodeGen/X86/inline-asm-pr76416.ll
new file mode 100644
index 0000000000000..16b27de394bfa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/inline-asm-pr76416.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -O2 -mtriple=x86_64 < %s | FileCheck %s
+%struct.anon.5.28.78.99.149.119 = type { [4 x i8] }
+
+ at vga_load_state_p = external dso_local global ptr, align 8
+ at vga_load_state_data = external dso_local global i8, align 1
+
+define dso_local void @vga_load_state() #0 {
+; CHECK-LABEL: vga_load_state:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: jg .LBB0_3
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_2: # %for.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: incl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: cmpl $3, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: jle .LBB0_2
+; CHECK-NEXT: .LBB0_3: # %for.end
+; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_4: # %for.cond1
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: movq vga_load_state_p(%rip), %rax
+; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT: movzbl (%rax,%rcx), %eax
+; CHECK-NEXT: movb %al, vga_load_state_data(%rip)
+; CHECK-NEXT: leal 1(%rcx), %eax
+; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: jmp .LBB0_4
+entry:
+ %i = alloca i32, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 4
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ call void asm sideeffect "", "{ax},~{dirflag},~{fpsr},~{flags}"(i8 0) #2
+ %1 = load i32, ptr %i, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, ptr %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ store i32 0, ptr %i, align 4
+ br label %for.cond1
+
+for.cond1: ; preds = %for.cond1, %for.end
+ call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) #2
+ %2 = load ptr, ptr @vga_load_state_p, align 8
+ %regs = getelementptr inbounds %struct.anon.5.28.78.99.149.119, ptr %2, i32 0, i32 0
+ %3 = load i32, ptr %i, align 4
+ %idxprom = sext i32 %3 to i64
+ %arrayidx = getelementptr inbounds [4 x i8], ptr %regs, i64 0, i64 %idxprom
+ %4 = load i8, ptr %arrayidx, align 1
+ store i8 %4, ptr @vga_load_state_data, align 1
+ %5 = load i32, ptr %i, align 4
+ %inc5 = add nsw i32 %5, 1
+ store i32 %inc5, ptr %i, align 4
+ br label %for.cond1, !llvm.loop !1
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.mustprogress"}
diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll
index 1d98b4f62069d..d50d8ed7aaf6a 100644
--- a/llvm/test/CodeGen/X86/scalar_widen_div.ll
+++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll
@@ -150,15 +150,15 @@ define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: divw %si
-; CHECK-NEXT: # kill: def $ax killed $ax def $eax
-; CHECK-NEXT: movd %eax, %xmm2
-; CHECK-NEXT: pinsrw $1, %ecx, %xmm2
+; CHECK-NEXT: movl %eax, %esi
; CHECK-NEXT: pextrw $2, %xmm0, %eax
-; CHECK-NEXT: pextrw $2, %xmm1, %ecx
+; CHECK-NEXT: pextrw $2, %xmm1, %edi
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: divw %cx
+; CHECK-NEXT: divw %di
; CHECK-NEXT: # kill: def $ax killed $ax def $eax
+; CHECK-NEXT: movd %esi, %xmm2
+; CHECK-NEXT: pinsrw $1, %ecx, %xmm2
; CHECK-NEXT: pinsrw $2, %eax, %xmm2
; CHECK-NEXT: pextrw $3, %xmm0, %eax
; CHECK-NEXT: pextrw $3, %xmm1, %ecx
diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
index 5baf7a1dac74c..8b96a8050e65e 100644
--- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll
@@ -105,9 +105,9 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
; X86-LABEL: func8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpb %al, %dl
; X86-NEXT: setns %cl
@@ -140,9 +140,9 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind {
; X86-LABEL: func4:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: mulb {{[0-9]+}}(%esp)
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shlb $4, %al
; X86-NEXT: sarb $4, %al
; X86-NEXT: subb %al, %cl
diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll
index 82dfeeee13293..f49dec5714ff0 100644
--- a/llvm/test/CodeGen/X86/udiv_fix.ll
+++ b/llvm/test/CodeGen/X86/udiv_fix.ll
@@ -243,28 +243,30 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
; X64-NEXT: movdqa %xmm1, %xmm3
; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; X64-NEXT: movq %xmm3, %rcx
-; X64-NEXT: movdqa %xmm0, %xmm4
-; X64-NEXT: punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm2[2],xmm4[3],xmm2[3]
-; X64-NEXT: psllq $31, %xmm4
-; X64-NEXT: movq %xmm4, %rax
+; X64-NEXT: movdqa %xmm0, %xmm3
+; X64-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT: psllq $31, %xmm3
+; X64-NEXT: movq %xmm3, %rax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: divq %rcx
-; X64-NEXT: movq %rax, %xmm3
-; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3]
-; X64-NEXT: movq %xmm4, %rax
-; X64-NEXT: movdqa %xmm1, %xmm4
-; X64-NEXT: psrldq {{.*#+}} xmm4 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X64-NEXT: movq %xmm4, %rcx
+; X64-NEXT: movq %rax, %rcx
+; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
+; X64-NEXT: movq %xmm3, %rax
+; X64-NEXT: movdqa %xmm1, %xmm3
+; X64-NEXT: psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT: movq %xmm3, %rsi
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: divq %rcx
-; X64-NEXT: movq %rax, %xmm4
-; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; X64-NEXT: divq %rsi
+; X64-NEXT: movq %rax, %rsi
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X64-NEXT: psllq $31, %xmm0
; X64-NEXT: movq %xmm0, %rax
-; X64-NEXT: movd %xmm1, %ecx
+; X64-NEXT: movd %xmm1, %edi
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: divq %rcx
+; X64-NEXT: divq %rdi
+; X64-NEXT: movq %rcx, %xmm3
+; X64-NEXT: movq %rsi, %xmm2
+; X64-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
; X64-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/156576
More information about the llvm-commits
mailing list