[llvm] [X86]RMW instrs not handled in pre-RA-sched=fast (PR #67281)

via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 24 20:39:27 PDT 2023

https://github.com/XinWang10 created https://github.com/llvm/llvm-project/pull/67281


>From b97a85bc1be0543c77c1d9dfbe30ac0e4a0becf1 Mon Sep 17 00:00:00 2001
From: XinWang10 <xin10.wang at intel.com>
Date: Mon, 25 Sep 2023 11:38:40 +0800
Subject: [PATCH] [X86]RMW instrs not handled in pre-RA-sched=fast

 .../CodeGen/SelectionDAG/ScheduleDAGFast.cpp  |  5 ++
 llvm/test/CodeGen/X86/copy-eflags.ll          | 90 +++++++++++++++++++
 2 files changed, 95 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index ab4c33c9e976bc8..3941246cd170446 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -231,6 +231,11 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return nullptr;
+    // unfolding an x86 RMW operation results in store, dec, load which
+    // can't be handled here so quit
+    if (NewNodes.size() == 3)
+      return nullptr;
     LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll
index 6af80860401afd8..29d500e5964ce40 100644
--- a/llvm/test/CodeGen/X86/copy-eflags.ll
+++ b/llvm/test/CodeGen/X86/copy-eflags.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefix=X32
 ; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefix=X64
+; RUN: llc -o - -mtriple=x86_64-unknown-unknown -pre-RA-sched=fast %s | FileCheck %s --check-prefix=X64FAST
 ; Test patterns that require preserving and restoring flags.
@@ -63,6 +64,29 @@ define dso_local i32 @test1() nounwind {
 ; X64-NEXT:  .LBB0_2: # %if.end
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    retq
+; X64FAST-LABEL: test1:
+; X64FAST:       # %bb.0: # %entry
+; X64FAST-NEXT:    movzbl b(%rip), %ecx
+; X64FAST-NEXT:    leal 1(%rcx), %eax
+; X64FAST-NEXT:    movb %al, b(%rip)
+; X64FAST-NEXT:    incl c(%rip)
+; X64FAST-NEXT:    movzbl a(%rip), %edx
+; X64FAST-NEXT:    sete %sil
+; X64FAST-NEXT:    leal 1(%rdx), %edi
+; X64FAST-NEXT:    cmpb %cl, %dl
+; X64FAST-NEXT:    sete d(%rip)
+; X64FAST-NEXT:    movb %dil, a(%rip)
+; X64FAST-NEXT:    testb %sil, %sil
+; X64FAST-NEXT:    jne .LBB0_2
+; X64FAST-NEXT:  # %bb.1: # %if.then
+; X64FAST-NEXT:    pushq %rax
+; X64FAST-NEXT:    movsbl %al, %edi
+; X64FAST-NEXT:    callq external
+; X64FAST-NEXT:    addq $8, %rsp
+; X64FAST-NEXT:  .LBB0_2: # %if.end
+; X64FAST-NEXT:    xorl %eax, %eax
+; X64FAST-NEXT:    retq
   %bval = load i8, ptr @b
   %inc = add i8 %bval, 1
@@ -127,6 +151,24 @@ define dso_local i32 @test2(ptr %ptr) nounwind {
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    popq %rbx
 ; X64-NEXT:    retq
+; X64FAST-LABEL: test2:
+; X64FAST:       # %bb.0: # %entry
+; X64FAST-NEXT:    pushq %rbx
+; X64FAST-NEXT:    incl (%rdi)
+; X64FAST-NEXT:    setne %bl
+; X64FAST-NEXT:    movl $42, %edi
+; X64FAST-NEXT:    callq external
+; X64FAST-NEXT:    testb %bl, %bl
+; X64FAST-NEXT:    jne .LBB1_2
+; X64FAST-NEXT:  # %bb.1: # %then
+; X64FAST-NEXT:    movl $64, %eax
+; X64FAST-NEXT:    popq %rbx
+; X64FAST-NEXT:    retq
+; X64FAST-NEXT:  .LBB1_2: # %else
+; X64FAST-NEXT:    xorl %eax, %eax
+; X64FAST-NEXT:    popq %rbx
+; X64FAST-NEXT:    retq
   %val = load i32, ptr %ptr
   %inc = add i32 %val, 1
@@ -173,6 +215,17 @@ define dso_local void @test_tail_call(ptr %ptr) nounwind optsize {
 ; X64-NEXT:    jne external_b # TAILCALL
 ; X64-NEXT:  # %bb.1: # %then
 ; X64-NEXT:    jmp external_a # TAILCALL
+; X64FAST-LABEL: test_tail_call:
+; X64FAST:       # %bb.0: # %entry
+; X64FAST-NEXT:    incl (%rdi)
+; X64FAST-NEXT:    setne %al
+; X64FAST-NEXT:    incb a(%rip)
+; X64FAST-NEXT:    sete d(%rip)
+; X64FAST-NEXT:    testb %al, %al
+; X64FAST-NEXT:    jne external_b # TAILCALL
+; X64FAST-NEXT:  # %bb.1: # %then
+; X64FAST-NEXT:    jmp external_a # TAILCALL
   %val = load i32, ptr %ptr
   %inc = add i32 %val, 1
@@ -267,6 +320,29 @@ define dso_local void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, ptr %pt
 ; X64-NEXT:    cltd
 ; X64-NEXT:    idivl %r11d
 ; X64-NEXT:    jmp .LBB3_1
+; X64FAST-LABEL: PR37100:
+; X64FAST:       # %bb.0: # %bb
+; X64FAST-NEXT:    movq %rdx, %r10
+; X64FAST-NEXT:    movl {{[0-9]+}}(%rsp), %esi
+; X64FAST-NEXT:    movzbl %cl, %ecx
+; X64FAST-NEXT:    .p2align 4, 0x90
+; X64FAST-NEXT:  .LBB3_1: # %bb1
+; X64FAST-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64FAST-NEXT:    movsbq %dil, %rax
+; X64FAST-NEXT:    xorl %r11d, %r11d
+; X64FAST-NEXT:    cmpq %rax, %r10
+; X64FAST-NEXT:    setl %r11b
+; X64FAST-NEXT:    negl %r11d
+; X64FAST-NEXT:    cmpq %rax, %r10
+; X64FAST-NEXT:    movzbl %al, %edi
+; X64FAST-NEXT:    cmovgel %ecx, %edi
+; X64FAST-NEXT:    movb %dil, (%r8)
+; X64FAST-NEXT:    cmovgel (%r9), %r11d
+; X64FAST-NEXT:    movl %esi, %eax
+; X64FAST-NEXT:    cltd
+; X64FAST-NEXT:    idivl %r11d
+; X64FAST-NEXT:    jmp .LBB3_1
   br label %bb1
@@ -332,6 +408,20 @@ define dso_local void @PR37431(ptr %arg1, ptr %arg2, ptr %arg3, i32 %arg4, i64 %
 ; X64-NEXT:    idivl %edi
 ; X64-NEXT:    movb %dl, (%rcx)
 ; X64-NEXT:    retq
+; X64FAST-LABEL: PR37431:
+; X64FAST:       # %bb.0: # %entry
+; X64FAST-NEXT:    movl %ecx, %eax
+; X64FAST-NEXT:    movq %rdx, %rcx
+; X64FAST-NEXT:    xorl %r9d, %r9d
+; X64FAST-NEXT:    movslq (%rdi), %rdx
+; X64FAST-NEXT:    cmpq %rdx, %r8
+; X64FAST-NEXT:    sbbl %r9d, %r9d
+; X64FAST-NEXT:    cltd
+; X64FAST-NEXT:    idivl %r9d
+; X64FAST-NEXT:    movb %r9b, (%rsi)
+; X64FAST-NEXT:    movb %dl, (%rcx)
+; X64FAST-NEXT:    retq
   %tmp = load i32, ptr %arg1
   %tmp1 = sext i32 %tmp to i64

More information about the llvm-commits mailing list