[llvm] [X86] Resolve FIXME: Create cld only when needed (PR #82415)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 22 10:43:19 PST 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/82415
>From d2326d20a5e66b59b0cbaf004b65a9457c428c3e Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Tue, 20 Feb 2024 15:22:01 -0500
Subject: [PATCH] [X86] Resolve FIXME: Create cld only when needed
Only use cld when we also have rep instructions or are calling a function.
---
llvm/lib/Target/X86/X86FrameLowering.cpp | 63 ++++++++++++++++++--
llvm/test/CodeGen/X86/x86-32-intrcc.ll | 10 ----
llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll | 8 ---
llvm/test/CodeGen/X86/x86-64-intrcc.ll | 3 -
4 files changed, 59 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index be416fb0db0695..42fd1176a74a22 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -1418,6 +1418,34 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
return !isWin64Prologue(MF) && MF.needsFrameMoves();
}
+/// Return true if an opcode is part of the REP group of instructions
+static bool isOpcodeRep(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::REPNE_PREFIX:
+ case X86::REP_MOVSB_32:
+ case X86::REP_MOVSB_64:
+ case X86::REP_MOVSD_32:
+ case X86::REP_MOVSD_64:
+ case X86::REP_MOVSQ_32:
+ case X86::REP_MOVSQ_64:
+ case X86::REP_MOVSW_32:
+ case X86::REP_MOVSW_64:
+ case X86::REP_PREFIX:
+ case X86::REP_STOSB_32:
+ case X86::REP_STOSB_64:
+ case X86::REP_STOSD_32:
+ case X86::REP_STOSD_64:
+ case X86::REP_STOSQ_32:
+ case X86::REP_STOSQ_64:
+ case X86::REP_STOSW_32:
+ case X86::REP_STOSW_64:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -2194,13 +2222,40 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
// in each prologue of interrupt handler function.
//
- // FIXME: Create "cld" instruction only in these cases:
+ // Create "cld" instruction only in these cases:
// 1. The interrupt handling function uses any of the "rep" instructions.
// 2. Interrupt handling function calls another function.
//
- if (Fn.getCallingConv() == CallingConv::X86_INTR)
- BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
- .setMIFlag(MachineInstr::FrameSetup);
+ if (Fn.getCallingConv() == CallingConv::X86_INTR) {
+ bool NeedsCLD = false;
+
+ for (const MachineBasicBlock &B : MF) {
+ for (const MachineInstr &MI : B) {
+ if (MI.isCall()) {
+ NeedsCLD = true;
+ break;
+ }
+
+ if (isOpcodeRep(MI.getOpcode())) {
+ NeedsCLD = true;
+ break;
+ }
+
+ if (MI.isInlineAsm()) {
+ // TODO: Parse asm for rep instructions or call sites?
+ // For now, let's play it safe and emit a cld instruction
+ // just in case.
+ NeedsCLD = true;
+ break;
+ }
+ }
+ }
+
+ if (NeedsCLD) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
// At this point we know if the function has WinCFI or not.
MF.setHasWinCFI(HasWinCFI);
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index 2e482753e26853..3c3944c2082bd5 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -149,7 +149,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: andl $-16, %esp
-; CHECK-NEXT: cld
; CHECK-NEXT: fldt f80
; CHECK-NEXT: fld1
; CHECK-NEXT: faddp %st, %st(1)
@@ -163,7 +162,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
; CHECK0-NEXT: pushl %ebp
; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: andl $-16, %esp
-; CHECK0-NEXT: cld
; CHECK0-NEXT: fldt f80
; CHECK0-NEXT: fld1
; CHECK0-NEXT: faddp %st, %st(1)
@@ -188,7 +186,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
-; CHECK-NEXT: cld
; CHECK-NEXT: leal 20(%ebp), %eax
; CHECK-NEXT: leal 4(%ebp), %ecx
; CHECK-NEXT: movl %ecx, sink_address
@@ -206,7 +203,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
-; CHECK0-NEXT: cld
; CHECK0-NEXT: leal 4(%ebp), %ecx
; CHECK0-NEXT: movl %ecx, %eax
; CHECK0-NEXT: addl $16, %eax
@@ -234,7 +230,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushl %ecx
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
-; CHECK-NEXT: cld
; CHECK-NEXT: movl 4(%ebp), %eax
; CHECK-NEXT: leal 24(%ebp), %ecx
; CHECK-NEXT: leal 8(%ebp), %edx
@@ -257,7 +252,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK0-NEXT: pushl %ecx
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
-; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: leal 8(%ebp), %edx
; CHECK0-NEXT: movl %edx, %ecx
@@ -288,7 +282,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK-NEXT: movl %esp, %ebp
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-16, %esp
-; CHECK-NEXT: cld
; CHECK-NEXT: leal 4(%ebp), %eax
; CHECK-NEXT: movl %eax, sink_address
; CHECK-NEXT: leal -4(%ebp), %esp
@@ -303,7 +296,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK0-NEXT: movl %esp, %ebp
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-16, %esp
-; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: leal 4(%ebp), %eax
; CHECK0-NEXT: movl %eax, sink_address
@@ -358,7 +350,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: andl $-32, %esp
; CHECK-NEXT: subl $32, %esp
-; CHECK-NEXT: cld
; CHECK-NEXT: movl 4(%ebp), %eax
; CHECK-NEXT: movl %eax, (%esp)
; CHECK-NEXT: leal -4(%ebp), %esp
@@ -374,7 +365,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
; CHECK0-NEXT: pushl %eax
; CHECK0-NEXT: andl $-32, %esp
; CHECK0-NEXT: subl $32, %esp
-; CHECK0-NEXT: cld
; CHECK0-NEXT: movl 4(%ebp), %eax
; CHECK0-NEXT: movl %eax, (%esp)
; CHECK0-NEXT: leal -4(%ebp), %esp
diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll b/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll
index a46b9d9ba5a100..1fe395b84d46c3 100644
--- a/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll
+++ b/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll
@@ -21,28 +21,24 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_empty(ptr nocapture byval(%s
; CHECK-USER-LABEL: test_uintr_isr_cc_empty:
; CHECK-USER: # %bb.0: # %entry
; CHECK-USER-NEXT: pushq %rax
-; CHECK-USER-NEXT: cld
; CHECK-USER-NEXT: addq $16, %rsp
; CHECK-USER-NEXT: uiret
;
; CHECK0-USER-LABEL: test_uintr_isr_cc_empty:
; CHECK0-USER: # %bb.0: # %entry
; CHECK0-USER-NEXT: pushq %rax
-; CHECK0-USER-NEXT: cld
; CHECK0-USER-NEXT: addq $16, %rsp
; CHECK0-USER-NEXT: uiret
;
; CHECK-KERNEL-LABEL: test_uintr_isr_cc_empty:
; CHECK-KERNEL: # %bb.0: # %entry
; CHECK-KERNEL-NEXT: pushq %rax
-; CHECK-KERNEL-NEXT: cld
; CHECK-KERNEL-NEXT: addq $16, %rsp
; CHECK-KERNEL-NEXT: iretq
;
; CHECK0-KERNEL-LABEL: test_uintr_isr_cc_empty:
; CHECK0-KERNEL: # %bb.0: # %entry
; CHECK0-KERNEL-NEXT: pushq %rax
-; CHECK0-KERNEL-NEXT: cld
; CHECK0-KERNEL-NEXT: addq $16, %rsp
; CHECK0-KERNEL-NEXT: iretq
entry:
@@ -75,7 +71,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK-USER-NEXT: pushq %rax
; CHECK-USER-NEXT: pushq %rdx
; CHECK-USER-NEXT: pushq %rcx
-; CHECK-USER-NEXT: cld
; CHECK-USER-NEXT: movq 32(%rsp), %rax
; CHECK-USER-NEXT: movq 40(%rsp), %rcx
; CHECK-USER-NEXT: movq 48(%rsp), %rdx
@@ -96,7 +91,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK0-USER-NEXT: pushq %rax
; CHECK0-USER-NEXT: pushq %rdx
; CHECK0-USER-NEXT: pushq %rcx
-; CHECK0-USER-NEXT: cld
; CHECK0-USER-NEXT: movq 32(%rsp), %rax
; CHECK0-USER-NEXT: leaq 40(%rsp), %rcx
; CHECK0-USER-NEXT: movq (%rcx), %rdx
@@ -118,7 +112,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK-KERNEL-NEXT: pushq %rax
; CHECK-KERNEL-NEXT: pushq %rdx
; CHECK-KERNEL-NEXT: pushq %rcx
-; CHECK-KERNEL-NEXT: cld
; CHECK-KERNEL-NEXT: movq 32(%rsp), %rax
; CHECK-KERNEL-NEXT: movq 40(%rsp), %rcx
; CHECK-KERNEL-NEXT: movq 48(%rsp), %rdx
@@ -139,7 +132,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
; CHECK0-KERNEL-NEXT: pushq %rax
; CHECK0-KERNEL-NEXT: pushq %rdx
; CHECK0-KERNEL-NEXT: pushq %rcx
-; CHECK0-KERNEL-NEXT: cld
; CHECK0-KERNEL-NEXT: movq 32(%rsp), %rax
; CHECK0-KERNEL-NEXT: leaq 40(%rsp), %rcx
; CHECK0-KERNEL-NEXT: movq (%rcx), %rdx
diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc.ll b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
index 443d4c2fa46490..5fc606eb566ea6 100644
--- a/llvm/test/CodeGen/X86/x86-64-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
@@ -114,7 +114,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
- ; CHECK: cld
; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]]
; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]]
; CHECK: movq %[[R1]], sink_address
@@ -136,7 +135,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
- ; CHECK: cld
; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]]
; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]]
; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]]
@@ -164,7 +162,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
- ; CHECK: cld
; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]]
; CHECK: movq %[[R1]], sink_address(%rip)
entry:
More information about the llvm-commits
mailing list