[llvm] [X86]RMW instrs not handled in pre-RA-sched=fast (PR #67281)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 24 20:39:27 PDT 2023
https://github.com/XinWang10 created https://github.com/llvm/llvm-project/pull/67281
None
>From b97a85bc1be0543c77c1d9dfbe30ac0e4a0becf1 Mon Sep 17 00:00:00 2001
From: XinWang10 <xin10.wang at intel.com>
Date: Mon, 25 Sep 2023 11:38:40 +0800
Subject: [PATCH] [X86]RMW instrs not handled in pre-RA-sched=fast
---
.../CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 5 ++
llvm/test/CodeGen/X86/copy-eflags.ll | 90 +++++++++++++++++++
2 files changed, 95 insertions(+)
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index ab4c33c9e976bc8..3941246cd170446 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -231,6 +231,11 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return nullptr;
+ // unfolding an x86 RMW operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return nullptr;
+
LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll
index 6af80860401afd8..29d500e5964ce40 100644
--- a/llvm/test/CodeGen/X86/copy-eflags.ll
+++ b/llvm/test/CodeGen/X86/copy-eflags.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefix=X32
; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefix=X64
+; RUN: llc -o - -mtriple=x86_64-unknown-unknown -pre-RA-sched=fast %s | FileCheck %s --check-prefix=X64FAST
;
; Test patterns that require preserving and restoring flags.
@@ -63,6 +64,29 @@ define dso_local i32 @test1() nounwind {
; X64-NEXT: .LBB0_2: # %if.end
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
+;
+; X64FAST-LABEL: test1:
+; X64FAST: # %bb.0: # %entry
+; X64FAST-NEXT: movzbl b(%rip), %ecx
+; X64FAST-NEXT: leal 1(%rcx), %eax
+; X64FAST-NEXT: movb %al, b(%rip)
+; X64FAST-NEXT: incl c(%rip)
+; X64FAST-NEXT: movzbl a(%rip), %edx
+; X64FAST-NEXT: sete %sil
+; X64FAST-NEXT: leal 1(%rdx), %edi
+; X64FAST-NEXT: cmpb %cl, %dl
+; X64FAST-NEXT: sete d(%rip)
+; X64FAST-NEXT: movb %dil, a(%rip)
+; X64FAST-NEXT: testb %sil, %sil
+; X64FAST-NEXT: jne .LBB0_2
+; X64FAST-NEXT: # %bb.1: # %if.then
+; X64FAST-NEXT: pushq %rax
+; X64FAST-NEXT: movsbl %al, %edi
+; X64FAST-NEXT: callq external
+; X64FAST-NEXT: addq $8, %rsp
+; X64FAST-NEXT: .LBB0_2: # %if.end
+; X64FAST-NEXT: xorl %eax, %eax
+; X64FAST-NEXT: retq
entry:
%bval = load i8, ptr @b
%inc = add i8 %bval, 1
@@ -127,6 +151,24 @@ define dso_local i32 @test2(ptr %ptr) nounwind {
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: retq
+;
+; X64FAST-LABEL: test2:
+; X64FAST: # %bb.0: # %entry
+; X64FAST-NEXT: pushq %rbx
+; X64FAST-NEXT: incl (%rdi)
+; X64FAST-NEXT: setne %bl
+; X64FAST-NEXT: movl $42, %edi
+; X64FAST-NEXT: callq external
+; X64FAST-NEXT: testb %bl, %bl
+; X64FAST-NEXT: jne .LBB1_2
+; X64FAST-NEXT: # %bb.1: # %then
+; X64FAST-NEXT: movl $64, %eax
+; X64FAST-NEXT: popq %rbx
+; X64FAST-NEXT: retq
+; X64FAST-NEXT: .LBB1_2: # %else
+; X64FAST-NEXT: xorl %eax, %eax
+; X64FAST-NEXT: popq %rbx
+; X64FAST-NEXT: retq
entry:
%val = load i32, ptr %ptr
%inc = add i32 %val, 1
@@ -173,6 +215,17 @@ define dso_local void @test_tail_call(ptr %ptr) nounwind optsize {
; X64-NEXT: jne external_b # TAILCALL
; X64-NEXT: # %bb.1: # %then
; X64-NEXT: jmp external_a # TAILCALL
+;
+; X64FAST-LABEL: test_tail_call:
+; X64FAST: # %bb.0: # %entry
+; X64FAST-NEXT: incl (%rdi)
+; X64FAST-NEXT: setne %al
+; X64FAST-NEXT: incb a(%rip)
+; X64FAST-NEXT: sete d(%rip)
+; X64FAST-NEXT: testb %al, %al
+; X64FAST-NEXT: jne external_b # TAILCALL
+; X64FAST-NEXT: # %bb.1: # %then
+; X64FAST-NEXT: jmp external_a # TAILCALL
entry:
%val = load i32, ptr %ptr
%inc = add i32 %val, 1
@@ -267,6 +320,29 @@ define dso_local void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, ptr %pt
; X64-NEXT: cltd
; X64-NEXT: idivl %r11d
; X64-NEXT: jmp .LBB3_1
+;
+; X64FAST-LABEL: PR37100:
+; X64FAST: # %bb.0: # %bb
+; X64FAST-NEXT: movq %rdx, %r10
+; X64FAST-NEXT: movl {{[0-9]+}}(%rsp), %esi
+; X64FAST-NEXT: movzbl %cl, %ecx
+; X64FAST-NEXT: .p2align 4, 0x90
+; X64FAST-NEXT: .LBB3_1: # %bb1
+; X64FAST-NEXT: # =>This Inner Loop Header: Depth=1
+; X64FAST-NEXT: movsbq %dil, %rax
+; X64FAST-NEXT: xorl %r11d, %r11d
+; X64FAST-NEXT: cmpq %rax, %r10
+; X64FAST-NEXT: setl %r11b
+; X64FAST-NEXT: negl %r11d
+; X64FAST-NEXT: cmpq %rax, %r10
+; X64FAST-NEXT: movzbl %al, %edi
+; X64FAST-NEXT: cmovgel %ecx, %edi
+; X64FAST-NEXT: movb %dil, (%r8)
+; X64FAST-NEXT: cmovgel (%r9), %r11d
+; X64FAST-NEXT: movl %esi, %eax
+; X64FAST-NEXT: cltd
+; X64FAST-NEXT: idivl %r11d
+; X64FAST-NEXT: jmp .LBB3_1
bb:
br label %bb1
@@ -332,6 +408,20 @@ define dso_local void @PR37431(ptr %arg1, ptr %arg2, ptr %arg3, i32 %arg4, i64 %
; X64-NEXT: idivl %edi
; X64-NEXT: movb %dl, (%rcx)
; X64-NEXT: retq
+;
+; X64FAST-LABEL: PR37431:
+; X64FAST: # %bb.0: # %entry
+; X64FAST-NEXT: movl %ecx, %eax
+; X64FAST-NEXT: movq %rdx, %rcx
+; X64FAST-NEXT: xorl %r9d, %r9d
+; X64FAST-NEXT: movslq (%rdi), %rdx
+; X64FAST-NEXT: cmpq %rdx, %r8
+; X64FAST-NEXT: sbbl %r9d, %r9d
+; X64FAST-NEXT: cltd
+; X64FAST-NEXT: idivl %r9d
+; X64FAST-NEXT: movb %r9b, (%rsi)
+; X64FAST-NEXT: movb %dl, (%rcx)
+; X64FAST-NEXT: retq
entry:
%tmp = load i32, ptr %arg1
%tmp1 = sext i32 %tmp to i64
More information about the llvm-commits
mailing list