[llvm] 10e7899 - [FPEnv] Get rid of extra moves in fpenv calls
Serge Pavlov via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 6 00:56:17 PDT 2023
Author: Serge Pavlov
Date: 2023-06-06T14:54:52+07:00
New Revision: 10e7899818803029d2f91a20c67f5a1d75781b8a
URL: https://github.com/llvm/llvm-project/commit/10e7899818803029d2f91a20c67f5a1d75781b8a
DIFF: https://github.com/llvm/llvm-project/commit/10e7899818803029d2f91a20c67f5a1d75781b8a.diff
LOG: [FPEnv] Get rid of extra moves in fpenv calls
If intrinsic `get_fpenv` or `set_fpenv` is lowered to the form where FP
environment is represented as a region in memory, extra moves can
appear. For example the code:
define void @func_01(ptr %ptr) {
%env = call i256 @llvm.get.fpenv.i256()
store i256 %env, ptr %ptr
ret void
}
produces DAG:
ch = get_fpenv_mem ch, memory_region
val: i256, ch = load ch, memory_region
ch = store ch, ptr, val
In this case the extra moves can be avoided if `get_fpenv_mem` got
pointer to the memory where the FP environment should be finally placed.
This change implement such optimization for this use case.
Differential Revision: https://reviews.llvm.org/D150437
Added:
llvm/test/CodeGen/X86/fpenv-combine.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/fpenv.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 54a25c4b19513..02da5508656e6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -539,6 +539,8 @@ namespace {
SDValue visitFP_TO_BF16(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
SDValue visitVPOp(SDNode *N);
+ SDValue visitGET_FPENV_MEM(SDNode *N);
+ SDValue visitSET_FPENV_MEM(SDNode *N);
template <class MatchContextClass>
SDValue visitFADDForFMACombine(SDNode *N);
@@ -2001,6 +2003,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
case ISD::FREEZE: return visitFREEZE(N);
+ case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
+ case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -25714,6 +25718,97 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
+
+ // Check if the memory, where FP state is written to, is used only in a single
+ // load operation.
+ LoadSDNode *LdNode = nullptr;
+ for (auto *U : Ptr->uses()) {
+ if (U == N)
+ continue;
+ if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
+ if (LdNode && LdNode != Ld)
+ return SDValue();
+ LdNode = Ld;
+ continue;
+ }
+ return SDValue();
+ }
+ if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
+ !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
+ !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0)))
+ return SDValue();
+
+ // Check if the loaded value is used only in a store operation.
+ StoreSDNode *StNode = nullptr;
+ for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
+ SDUse &U = I.getUse();
+ if (U.getResNo() == 0) {
+ if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
+ if (StNode)
+ return SDValue();
+ StNode = St;
+ } else {
+ return SDValue();
+ }
+ }
+ }
+ if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
+ !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
+ !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
+ return SDValue();
+
+ // Create new node GET_FPENV_MEM, which uses the store address to write FP
+ // environment.
+ SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
+ StNode->getMemOperand());
+ CombineTo(StNode, Res, false);
+ return Res;
+}
+
+SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
+
+ // Check if the address of FP state is used also in a store operation only.
+ StoreSDNode *StNode = nullptr;
+ for (auto *U : Ptr->uses()) {
+ if (U == N)
+ continue;
+ if (auto *St = dyn_cast<StoreSDNode>(U)) {
+ if (StNode && StNode != St)
+ return SDValue();
+ StNode = St;
+ continue;
+ }
+ return SDValue();
+ }
+ if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
+ !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
+ !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
+ return SDValue();
+
+ // Check if the stored value is loaded from some location and the loaded
+ // value is used only in the store operation.
+ SDValue StValue = StNode->getValue();
+ auto *LdNode = dyn_cast<LoadSDNode>(StValue);
+ if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
+ !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
+ !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
+ return SDValue();
+
+ // Create new node SET_FPENV_MEM, which uses the load address to read FP
+ // environment.
+ SDValue Res =
+ DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
+ LdNode->getMemOperand());
+ return Res;
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
diff --git a/llvm/test/CodeGen/X86/fpenv-combine.ll b/llvm/test/CodeGen/X86/fpenv-combine.ll
new file mode 100644
index 0000000000000..568369a061ceb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fpenv-combine.ll
@@ -0,0 +1,200 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s -check-prefix=X64
+
+declare i256 @llvm.get.fpenv.i256()
+declare void @llvm.set.fpenv.i256(i256 %fpenv)
+declare void @llvm.reset.fpenv()
+
+; Cannot fold get_fpenv+load+store because loaded value is used in
+; more than one instruction.
+define void @get_fpenv_02(ptr %ptr1, ptr %ptr2) #0 {
+; X64-LABEL: get_fpenv_02:
+; X64: # %bb.0:
+; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %rbx
+; X64-NEXT: subq $40, %rsp
+; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: movq %rdi, %r14
+; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: callq fegetenv at PLT
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT: movq %rsi, 24(%r14)
+; X64-NEXT: movq %rcx, (%r14)
+; X64-NEXT: movq %rdx, 8(%r14)
+; X64-NEXT: movq %rax, 16(%r14)
+; X64-NEXT: movq %rax, 16(%rbx)
+; X64-NEXT: movq %rsi, 24(%rbx)
+; X64-NEXT: movq %rcx, (%rbx)
+; X64-NEXT: movq %rdx, 8(%rbx)
+; X64-NEXT: addq $40, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %r14
+; X64-NEXT: retq
+ %fpenv = call i256 @llvm.get.fpenv.i256()
+ store i256 %fpenv, ptr %ptr1
+ store i256 %fpenv, ptr %ptr2
+ ret void
+}
+
+; Cannot fold get_fpenv+load+store because load and store have
diff erent type.
+define void @get_fpenv_03(ptr %ptr) #0 {
+; X64-LABEL: get_fpenv_03:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: subq $32, %rsp
+; X64-NEXT: movq %rdi, %rbx
+; X64-NEXT: movq %rsp, %rdi
+; X64-NEXT: callq fegetenv at PLT
+; X64-NEXT: movl (%rsp), %eax
+; X64-NEXT: movl %eax, (%rbx)
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %fpenv = call i256 @llvm.get.fpenv.i256()
+ %part = trunc i256 %fpenv to i32
+ store i32 %part, ptr %ptr
+ ret void
+}
+
+; Cannot fold get_fpenv+load+store because loaded value is not
+; immediately stored.
+define void @get_fpenv_04(ptr %ptr) #0 {
+; X64-LABEL: get_fpenv_04:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: subq $32, %rsp
+; X64-NEXT: movq %rdi, %rbx
+; X64-NEXT: movq %rsp, %rdi
+; X64-NEXT: callq fegetenv at PLT
+; X64-NEXT: movq (%rsp), %rax
+; X64-NEXT: andl $1, %eax
+; X64-NEXT: movq %rax, (%rbx)
+; X64-NEXT: movq $0, 16(%rbx)
+; X64-NEXT: movq $0, 24(%rbx)
+; X64-NEXT: movq $0, 8(%rbx)
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %fpenv = call i256 @llvm.get.fpenv.i256()
+ %masked = and i256 %fpenv, 1
+ store i256 %masked, ptr %ptr
+ ret void
+}
+
+; Cannot fold get_fpenv+load+store because there is a memory operation
+; between load and store.
+define void @get_fpenv_05(ptr %ptr1, ptr %ptr2) #0 {
+; X64-LABEL: get_fpenv_05:
+; X64: # %bb.0:
+; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %rbx
+; X64-NEXT: subq $40, %rsp
+; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: movq %rdi, %r14
+; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: callq fegetenv at PLT
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT: movl $0, (%r14)
+; X64-NEXT: movq %rsi, 24(%rbx)
+; X64-NEXT: movq %rdx, 16(%rbx)
+; X64-NEXT: movq %rcx, 8(%rbx)
+; X64-NEXT: movq %rax, (%rbx)
+; X64-NEXT: addq $40, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %r14
+; X64-NEXT: retq
+ %fpenv = call i256 @llvm.get.fpenv.i256()
+ store i32 0, ptr %ptr1
+ store i256 %fpenv, ptr %ptr2
+ ret void
+}
+
+; Cannot fold load+save+set_fpenv because there is a memory operation
+; between load and store.
+define void @set_fpenv_02(ptr %ptr1, ptr %ptr2) #0 {
+; X64-LABEL: set_fpenv_02:
+; X64: # %bb.0:
+; X64-NEXT: subq $40, %rsp
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: movq 8(%rdi), %rcx
+; X64-NEXT: movq 16(%rdi), %rdx
+; X64-NEXT: movq 24(%rdi), %rdi
+; X64-NEXT: movl $0, (%rsi)
+; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp)
+; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: callq fesetenv at PLT
+; X64-NEXT: addq $40, %rsp
+; X64-NEXT: retq
+ %fpenv = load i256, ptr %ptr1
+ store i32 0, ptr %ptr2
+ call void @llvm.set.fpenv.i256(i256 %fpenv)
+ ret void
+}
+
+; Cannot fold load+save+set_fpenv because loaded value is used in
+; more then one store.
+define void @set_fpenv_03(ptr %ptr1, ptr %ptr2) #0 {
+; X64-LABEL: set_fpenv_03:
+; X64: # %bb.0:
+; X64-NEXT: pushq %r15
+; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %r13
+; X64-NEXT: pushq %r12
+; X64-NEXT: pushq %rbx
+; X64-NEXT: subq $32, %rsp
+; X64-NEXT: movq %rsi, %rbx
+; X64-NEXT: movq (%rdi), %r14
+; X64-NEXT: movq 8(%rdi), %r15
+; X64-NEXT: movq 16(%rdi), %r12
+; X64-NEXT: movq 24(%rdi), %r13
+; X64-NEXT: callq fesetenv at PLT
+; X64-NEXT: movq %r13, 24(%rbx)
+; X64-NEXT: movq %r12, 16(%rbx)
+; X64-NEXT: movq %r15, 8(%rbx)
+; X64-NEXT: movq %r14, (%rbx)
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %r12
+; X64-NEXT: popq %r13
+; X64-NEXT: popq %r14
+; X64-NEXT: popq %r15
+; X64-NEXT: retq
+ %fpenv = load i256, ptr %ptr1
+ call void @llvm.set.fpenv.i256(i256 %fpenv)
+ store i256 %fpenv, ptr %ptr2
+ ret void
+}
+
+; Cannot fold load+save+set_fpenv because loaded value is not
+; immediately stored.
+define void @set_fpenv_04(ptr %ptr) #0 {
+; X64-LABEL: set_fpenv_04:
+; X64: # %bb.0:
+; X64-NEXT: subq $40, %rsp
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: andl $1, %eax
+; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT: movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT: callq fesetenv at PLT
+; X64-NEXT: addq $40, %rsp
+; X64-NEXT: retq
+ %fpenv = load i256, ptr %ptr
+ %masked = and i256 %fpenv, 1
+ call void @llvm.set.fpenv.i256(i256 %masked)
+ ret void
+}
+
+
+attributes #0 = { nounwind "use-soft-float"="true" }
diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll
index 79e4ffeb9f3e1..982a36fc1a5bf 100644
--- a/llvm/test/CodeGen/X86/fpenv.ll
+++ b/llvm/test/CodeGen/X86/fpenv.ll
@@ -249,97 +249,27 @@ define void @func_05(i32 %x) nounwind {
define void @get_fpenv_01(ptr %ptr) #0 {
; X86-NOSSE-LABEL: get_fpenv_01:
; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: pushl %ebp
-; X86-NOSSE-NEXT: pushl %ebx
-; X86-NOSSE-NEXT: pushl %edi
-; X86-NOSSE-NEXT: pushl %esi
-; X86-NOSSE-NEXT: subl $60, %esp
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: subl $44, %esp
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: calll fegetenv
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NOSSE-NEXT: movl %ecx, 24(%esi)
-; X86-NOSSE-NEXT: movl %eax, 28(%esi)
-; X86-NOSSE-NEXT: movl %ebp, 16(%esi)
-; X86-NOSSE-NEXT: movl %ebx, 20(%esi)
-; X86-NOSSE-NEXT: movl %edi, 8(%esi)
-; X86-NOSSE-NEXT: movl %edx, 12(%esi)
-; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOSSE-NEXT: movl %eax, (%esi)
-; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOSSE-NEXT: movl %eax, 4(%esi)
-; X86-NOSSE-NEXT: addl $60, %esp
-; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: popl %edi
-; X86-NOSSE-NEXT: popl %ebx
-; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: addl $44, %esp
; X86-NOSSE-NEXT: retl
;
; X86-SSE-LABEL: get_fpenv_01:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: pushl %ebp
-; X86-SSE-NEXT: pushl %ebx
-; X86-SSE-NEXT: pushl %edi
-; X86-SSE-NEXT: pushl %esi
-; X86-SSE-NEXT: subl $60, %esp
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT: subl $44, %esp
+; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll fegetenv
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SSE-NEXT: movl %ecx, 24(%esi)
-; X86-SSE-NEXT: movl %eax, 28(%esi)
-; X86-SSE-NEXT: movl %ebp, 16(%esi)
-; X86-SSE-NEXT: movl %ebx, 20(%esi)
-; X86-SSE-NEXT: movl %edi, 8(%esi)
-; X86-SSE-NEXT: movl %edx, 12(%esi)
-; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SSE-NEXT: movl %eax, (%esi)
-; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SSE-NEXT: movl %eax, 4(%esi)
-; X86-SSE-NEXT: addl $60, %esp
-; X86-SSE-NEXT: popl %esi
-; X86-SSE-NEXT: popl %edi
-; X86-SSE-NEXT: popl %ebx
-; X86-SSE-NEXT: popl %ebp
+; X86-SSE-NEXT: addl $44, %esp
; X86-SSE-NEXT: retl
;
; X64-LABEL: get_fpenv_01:
; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rbx
-; X64-NEXT: subq $32, %rsp
-; X64-NEXT: movq %rdi, %rbx
-; X64-NEXT: movq %rsp, %rdi
+; X64-NEXT: subq $40, %rsp
; X64-NEXT: callq fegetenv at PLT
-; X64-NEXT: movq (%rsp), %rax
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx
-; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi
-; X64-NEXT: movq %rsi, 16(%rbx)
-; X64-NEXT: movq %rdx, 24(%rbx)
-; X64-NEXT: movq %rax, (%rbx)
-; X64-NEXT: movq %rcx, 8(%rbx)
-; X64-NEXT: addq $32, %rsp
-; X64-NEXT: popq %rbx
+; X64-NEXT: addq $40, %rsp
; X64-NEXT: retq
entry:
%env = call i256 @llvm.get.fpenv.i256()
@@ -350,88 +280,25 @@ entry:
define void @set_fpenv_01(ptr %ptr) #0 {
; X86-NOSSE-LABEL: set_fpenv_01:
; X86-NOSSE: # %bb.0: # %entry
-; X86-NOSSE-NEXT: pushl %ebp
-; X86-NOSSE-NEXT: pushl %ebx
-; X86-NOSSE-NEXT: pushl %edi
-; X86-NOSSE-NEXT: pushl %esi
; X86-NOSSE-NEXT: subl $44, %esp
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOSSE-NEXT: movl (%eax), %ecx
-; X86-NOSSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOSSE-NEXT: movl 4(%eax), %edx
-; X86-NOSSE-NEXT: movl 12(%eax), %esi
-; X86-NOSSE-NEXT: movl 8(%eax), %edi
-; X86-NOSSE-NEXT: movl 20(%eax), %ebx
-; X86-NOSSE-NEXT: movl 16(%eax), %ebp
-; X86-NOSSE-NEXT: movl 28(%eax), %ecx
-; X86-NOSSE-NEXT: movl 24(%eax), %eax
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %ebp, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NOSSE-NEXT: movl %eax, (%esp)
; X86-NOSSE-NEXT: calll fesetenv
; X86-NOSSE-NEXT: addl $44, %esp
-; X86-NOSSE-NEXT: popl %esi
-; X86-NOSSE-NEXT: popl %edi
-; X86-NOSSE-NEXT: popl %ebx
-; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl
;
; X86-SSE-LABEL: set_fpenv_01:
; X86-SSE: # %bb.0: # %entry
-; X86-SSE-NEXT: pushl %ebp
-; X86-SSE-NEXT: pushl %ebx
-; X86-SSE-NEXT: pushl %edi
-; X86-SSE-NEXT: pushl %esi
; X86-SSE-NEXT: subl $44, %esp
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: movl (%eax), %ecx
-; X86-SSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SSE-NEXT: movl 4(%eax), %edx
-; X86-SSE-NEXT: movl 12(%eax), %esi
-; X86-SSE-NEXT: movl 8(%eax), %edi
-; X86-SSE-NEXT: movl 20(%eax), %ebx
-; X86-SSE-NEXT: movl 16(%eax), %ebp
-; X86-SSE-NEXT: movl 28(%eax), %ecx
-; X86-SSE-NEXT: movl 24(%eax), %eax
-; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-SSE-NEXT: movl %eax, (%esp)
; X86-SSE-NEXT: calll fesetenv
; X86-SSE-NEXT: addl $44, %esp
-; X86-SSE-NEXT: popl %esi
-; X86-SSE-NEXT: popl %edi
-; X86-SSE-NEXT: popl %ebx
-; X86-SSE-NEXT: popl %ebp
; X86-SSE-NEXT: retl
;
; X64-LABEL: set_fpenv_01:
; X64: # %bb.0: # %entry
; X64-NEXT: subq $40, %rsp
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq 8(%rdi), %rcx
-; X64-NEXT: movq 24(%rdi), %rdx
-; X64-NEXT: movq 16(%rdi), %rsi
-; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
-; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
-; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp)
-; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp)
-; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; X64-NEXT: callq fesetenv at PLT
; X64-NEXT: addq $40, %rsp
; X64-NEXT: retq
More information about the llvm-commits
mailing list