[llvm] [X86] Resolve FIXME: Create cld only when needed (PR #82415)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 26 07:01:51 PST 2024


https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/82415

>From 1fbf24fd0436f3d6d06cda073feafd3b0200c98f Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Tue, 20 Feb 2024 15:22:01 -0500
Subject: [PATCH] [X86] Resolve FIXME: Create cld only when needed

Only use cld when we also have rep instructions, are calling a function or contain inline asm.
---
 llvm/lib/Target/X86/X86FrameLowering.cpp     | 67 ++++++++++++++++++--
 llvm/test/CodeGen/X86/x86-32-intrcc.ll       | 10 ---
 llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll |  8 ---
 llvm/test/CodeGen/X86/x86-64-intrcc.ll       |  3 -
 4 files changed, 63 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index be416fb0db0695..d914e1b61ab075 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -1418,6 +1418,34 @@ bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const {
   return !isWin64Prologue(MF) && MF.needsFrameMoves();
 }
 
+/// Return true if an opcode is part of the REP group of instructions
+static bool isOpcodeRep(unsigned Opcode) {
+  switch (Opcode) {
+  case X86::REPNE_PREFIX:
+  case X86::REP_MOVSB_32:
+  case X86::REP_MOVSB_64:
+  case X86::REP_MOVSD_32:
+  case X86::REP_MOVSD_64:
+  case X86::REP_MOVSQ_32:
+  case X86::REP_MOVSQ_64:
+  case X86::REP_MOVSW_32:
+  case X86::REP_MOVSW_64:
+  case X86::REP_PREFIX:
+  case X86::REP_STOSB_32:
+  case X86::REP_STOSB_64:
+  case X86::REP_STOSD_32:
+  case X86::REP_STOSD_64:
+  case X86::REP_STOSQ_32:
+  case X86::REP_STOSQ_64:
+  case X86::REP_STOSW_32:
+  case X86::REP_STOSW_64:
+    return true;
+  default:
+    break;
+  }
+  return false;
+}
+
 /// emitPrologue - Push callee-saved registers onto the stack, which
 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
 /// space for local variables. Also emit labels used by the exception handler to
@@ -2194,13 +2222,44 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
   // in each prologue of interrupt handler function.
   //
-  // FIXME: Create "cld" instruction only in these cases:
+  // Create "cld" instruction only in these cases:
   // 1. The interrupt handling function uses any of the "rep" instructions.
   // 2. Interrupt handling function calls another function.
+  // 3. If there are any inline asm blocks, as we do not know what they do
   //
-  if (Fn.getCallingConv() == CallingConv::X86_INTR)
-    BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
-        .setMIFlag(MachineInstr::FrameSetup);
+  // TODO: We should also emit cld if we detect the use of std, but as of now,
+  // the compiler does not even emit that instruction or even define it, so in
+  // practice, this would only happen with inline asm, which we cover anyway.
+  if (Fn.getCallingConv() == CallingConv::X86_INTR) {
+    bool NeedsCLD = false;
+
+    for (const MachineBasicBlock &B : MF) {
+      for (const MachineInstr &MI : B) {
+        if (MI.isCall()) {
+          NeedsCLD = true;
+          break;
+        }
+
+        if (isOpcodeRep(MI.getOpcode())) {
+          NeedsCLD = true;
+          break;
+        }
+
+        if (MI.isInlineAsm()) {
+          // TODO: Parse asm for rep instructions or call sites?
+          // For now, let's play it safe and emit a cld instruction
+          // just in case.
+          NeedsCLD = true;
+          break;
+        }
+      }
+    }
+
+    if (NeedsCLD) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+  }
 
   // At this point we know if the function has WinCFI or not.
   MF.setHasWinCFI(HasWinCFI);
diff --git a/llvm/test/CodeGen/X86/x86-32-intrcc.ll b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
index 2e482753e26853..3c3944c2082bd5 100644
--- a/llvm/test/CodeGen/X86/x86-32-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-32-intrcc.ll
@@ -149,7 +149,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    andl $-16, %esp
-; CHECK-NEXT:    cld
 ; CHECK-NEXT:    fldt f80
 ; CHECK-NEXT:    fld1
 ; CHECK-NEXT:    faddp %st, %st(1)
@@ -163,7 +162,6 @@ define x86_intrcc void @test_isr_x87(ptr byval(%struct.interrupt_frame) %frame)
 ; CHECK0-NEXT:    pushl %ebp
 ; CHECK0-NEXT:    movl %esp, %ebp
 ; CHECK0-NEXT:    andl $-16, %esp
-; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    fldt f80
 ; CHECK0-NEXT:    fld1
 ; CHECK0-NEXT:    faddp %st, %st(1)
@@ -188,7 +186,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
 ; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    andl $-16, %esp
-; CHECK-NEXT:    cld
 ; CHECK-NEXT:    leal 20(%ebp), %eax
 ; CHECK-NEXT:    leal 4(%ebp), %ecx
 ; CHECK-NEXT:    movl %ecx, sink_address
@@ -206,7 +203,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
 ; CHECK0-NEXT:    pushl %ecx
 ; CHECK0-NEXT:    pushl %eax
 ; CHECK0-NEXT:    andl $-16, %esp
-; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    leal 4(%ebp), %ecx
 ; CHECK0-NEXT:    movl %ecx, %eax
 ; CHECK0-NEXT:    addl $16, %eax
@@ -234,7 +230,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
 ; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    andl $-16, %esp
-; CHECK-NEXT:    cld
 ; CHECK-NEXT:    movl 4(%ebp), %eax
 ; CHECK-NEXT:    leal 24(%ebp), %ecx
 ; CHECK-NEXT:    leal 8(%ebp), %edx
@@ -257,7 +252,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
 ; CHECK0-NEXT:    pushl %ecx
 ; CHECK0-NEXT:    pushl %eax
 ; CHECK0-NEXT:    andl $-16, %esp
-; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    movl 4(%ebp), %eax
 ; CHECK0-NEXT:    leal 8(%ebp), %edx
 ; CHECK0-NEXT:    movl %edx, %ecx
@@ -288,7 +282,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    andl $-16, %esp
-; CHECK-NEXT:    cld
 ; CHECK-NEXT:    leal 4(%ebp), %eax
 ; CHECK-NEXT:    movl %eax, sink_address
 ; CHECK-NEXT:    leal -4(%ebp), %esp
@@ -303,7 +296,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
 ; CHECK0-NEXT:    movl %esp, %ebp
 ; CHECK0-NEXT:    pushl %eax
 ; CHECK0-NEXT:    andl $-16, %esp
-; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    movl 4(%ebp), %eax
 ; CHECK0-NEXT:    leal 4(%ebp), %eax
 ; CHECK0-NEXT:    movl %eax, sink_address
@@ -358,7 +350,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    andl $-32, %esp
 ; CHECK-NEXT:    subl $32, %esp
-; CHECK-NEXT:    cld
 ; CHECK-NEXT:    movl 4(%ebp), %eax
 ; CHECK-NEXT:    movl %eax, (%esp)
 ; CHECK-NEXT:    leal -4(%ebp), %esp
@@ -374,7 +365,6 @@ define x86_intrcc void @test_isr_realign(ptr byval(%struct.interrupt_frame) %fra
 ; CHECK0-NEXT:    pushl %eax
 ; CHECK0-NEXT:    andl $-32, %esp
 ; CHECK0-NEXT:    subl $32, %esp
-; CHECK0-NEXT:    cld
 ; CHECK0-NEXT:    movl 4(%ebp), %eax
 ; CHECK0-NEXT:    movl %eax, (%esp)
 ; CHECK0-NEXT:    leal -4(%ebp), %esp
diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll b/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll
index a46b9d9ba5a100..1fe395b84d46c3 100644
--- a/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll
+++ b/llvm/test/CodeGen/X86/x86-64-intrcc-uintr.ll
@@ -21,28 +21,24 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_empty(ptr nocapture byval(%s
 ; CHECK-USER-LABEL: test_uintr_isr_cc_empty:
 ; CHECK-USER:       # %bb.0: # %entry
 ; CHECK-USER-NEXT:    pushq %rax
-; CHECK-USER-NEXT:    cld
 ; CHECK-USER-NEXT:    addq $16, %rsp
 ; CHECK-USER-NEXT:    uiret
 ;
 ; CHECK0-USER-LABEL: test_uintr_isr_cc_empty:
 ; CHECK0-USER:       # %bb.0: # %entry
 ; CHECK0-USER-NEXT:    pushq %rax
-; CHECK0-USER-NEXT:    cld
 ; CHECK0-USER-NEXT:    addq $16, %rsp
 ; CHECK0-USER-NEXT:    uiret
 ;
 ; CHECK-KERNEL-LABEL: test_uintr_isr_cc_empty:
 ; CHECK-KERNEL:       # %bb.0: # %entry
 ; CHECK-KERNEL-NEXT:    pushq %rax
-; CHECK-KERNEL-NEXT:    cld
 ; CHECK-KERNEL-NEXT:    addq $16, %rsp
 ; CHECK-KERNEL-NEXT:    iretq
 ;
 ; CHECK0-KERNEL-LABEL: test_uintr_isr_cc_empty:
 ; CHECK0-KERNEL:       # %bb.0: # %entry
 ; CHECK0-KERNEL-NEXT:    pushq %rax
-; CHECK0-KERNEL-NEXT:    cld
 ; CHECK0-KERNEL-NEXT:    addq $16, %rsp
 ; CHECK0-KERNEL-NEXT:    iretq
 entry:
@@ -75,7 +71,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
 ; CHECK-USER-NEXT:    pushq %rax
 ; CHECK-USER-NEXT:    pushq %rdx
 ; CHECK-USER-NEXT:    pushq %rcx
-; CHECK-USER-NEXT:    cld
 ; CHECK-USER-NEXT:    movq 32(%rsp), %rax
 ; CHECK-USER-NEXT:    movq 40(%rsp), %rcx
 ; CHECK-USER-NEXT:    movq 48(%rsp), %rdx
@@ -96,7 +91,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
 ; CHECK0-USER-NEXT:    pushq %rax
 ; CHECK0-USER-NEXT:    pushq %rdx
 ; CHECK0-USER-NEXT:    pushq %rcx
-; CHECK0-USER-NEXT:    cld
 ; CHECK0-USER-NEXT:    movq 32(%rsp), %rax
 ; CHECK0-USER-NEXT:    leaq 40(%rsp), %rcx
 ; CHECK0-USER-NEXT:    movq (%rcx), %rdx
@@ -118,7 +112,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
 ; CHECK-KERNEL-NEXT:    pushq %rax
 ; CHECK-KERNEL-NEXT:    pushq %rdx
 ; CHECK-KERNEL-NEXT:    pushq %rcx
-; CHECK-KERNEL-NEXT:    cld
 ; CHECK-KERNEL-NEXT:    movq 32(%rsp), %rax
 ; CHECK-KERNEL-NEXT:    movq 40(%rsp), %rcx
 ; CHECK-KERNEL-NEXT:    movq 48(%rsp), %rdx
@@ -139,7 +132,6 @@ define dso_local x86_intrcc void @test_uintr_isr_cc_args(ptr nocapture readonly
 ; CHECK0-KERNEL-NEXT:    pushq %rax
 ; CHECK0-KERNEL-NEXT:    pushq %rdx
 ; CHECK0-KERNEL-NEXT:    pushq %rcx
-; CHECK0-KERNEL-NEXT:    cld
 ; CHECK0-KERNEL-NEXT:    movq 32(%rsp), %rax
 ; CHECK0-KERNEL-NEXT:    leaq 40(%rsp), %rcx
 ; CHECK0-KERNEL-NEXT:    movq (%rcx), %rdx
diff --git a/llvm/test/CodeGen/X86/x86-64-intrcc.ll b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
index 443d4c2fa46490..5fc606eb566ea6 100644
--- a/llvm/test/CodeGen/X86/x86-64-intrcc.ll
+++ b/llvm/test/CodeGen/X86/x86-64-intrcc.ll
@@ -114,7 +114,6 @@ define dso_local x86_intrcc void @test_fp_1(ptr byval(%struct.interrupt_frame) %
   ; CHECK: # %bb.0: # %entry
   ; CHECK-NEXT: pushq %rbp
   ; CHECK-NEXT: movq %rsp, %rbp
-  ; CHECK: cld
   ; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]]
   ; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]]
   ; CHECK: movq %[[R1]], sink_address
@@ -136,7 +135,6 @@ define dso_local x86_intrcc void @test_fp_2(ptr byval(%struct.interrupt_frame) %
   ; CHECK-NEXT: pushq %rax
   ; CHECK-NEXT: pushq %rbp
   ; CHECK-NEXT: movq %rsp, %rbp
-  ; CHECK: cld
   ; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]]
   ; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]]
   ; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]]
@@ -164,7 +162,6 @@ define x86_intrcc void @test_copy_elide(ptr byval(%struct.interrupt_frame) %fram
   ; CHECK-NEXT: pushq %rax
   ; CHECK-NEXT: pushq %rbp
   ; CHECK-NEXT: movq %rsp, %rbp
-  ; CHECK: cld
   ; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]]
   ; CHECK: movq %[[R1]], sink_address(%rip)
 entry:



More information about the llvm-commits mailing list