[llvm] [LLVM] Slay undead copysign code (PR #111269)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 17:37:05 PDT 2024
https://github.com/workingjubilee updated https://github.com/llvm/llvm-project/pull/111269
>From 7745181604091de4895dad4dfa6c1ecdba7917cb Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Thu, 31 Oct 2024 17:32:24 -0700
Subject: [PATCH 1/5] [PowerPC] regenerate ctrloop-cpsgn.ll test
---
llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll | 80 +++++++++++++++++++++-
1 file changed, 79 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
index a114438a87476e..1d1612928dbc2a 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
@@ -1,9 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs < %s -mcpu=ppc | FileCheck %s
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
target triple = "powerpc-unknown-linux-gnu"
define ppc_fp128 @foo(ptr nocapture %n, ppc_fp128 %d) nounwind readonly {
+; CHECK-LABEL: foo:
+; CHECK-NOT: mtctr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stwu 1, -176(1)
+; CHECK-NEXT: stw 0, 180(1)
+; CHECK-NEXT: stfd 2, 128(1)
+; CHECK-NEXT: lwz 3, 132(1)
+; CHECK-NEXT: stfd 1, 136(1)
+; CHECK-NEXT: stw 3, 148(1)
+; CHECK-NEXT: lwz 3, 128(1)
+; CHECK-NEXT: stfd 31, 168(1) # 8-byte Folded Spill
+; CHECK-NEXT: stw 3, 144(1)
+; CHECK-NEXT: lwz 3, 140(1)
+; CHECK-NEXT: lfd 0, 144(1)
+; CHECK-NEXT: stw 3, 156(1)
+; CHECK-NEXT: lwz 3, 136(1)
+; CHECK-NEXT: stw 30, 160(1) # 4-byte Folded Spill
+; CHECK-NEXT: li 30, 2048
+; CHECK-NEXT: stw 3, 152(1)
+; CHECK-NEXT: lfd 31, 152(1)
+; CHECK-NEXT: fmr 1, 31
+; CHECK-NEXT: .LBB0_1: # %for.body
+; CHECK-NEXT: #
+; CHECK-NEXT: stfd 1, 64(1)
+; CHECK-NEXT: lwz 3, 68(1)
+; CHECK-NEXT: stfd 31, 88(1)
+; CHECK-NEXT: stw 3, 84(1)
+; CHECK-NEXT: lwz 3, 64(1)
+; CHECK-NEXT: stfd 0, 56(1)
+; CHECK-NEXT: stw 3, 80(1)
+; CHECK-NEXT: lwz 3, 92(1)
+; CHECK-NEXT: lfd 4, 96(1)
+; CHECK-NEXT: stw 3, 108(1)
+; CHECK-NEXT: lwz 3, 88(1)
+; CHECK-NEXT: lfd 1, 80(1)
+; CHECK-NEXT: stw 3, 104(1)
+; CHECK-NEXT: lwz 3, 60(1)
+; CHECK-NEXT: lfd 3, 104(1)
+; CHECK-NEXT: stw 3, 76(1)
+; CHECK-NEXT: lwz 3, 56(1)
+; CHECK-NEXT: stw 3, 72(1)
+; CHECK-NEXT: lfd 2, 72(1)
+; CHECK-NEXT: bl copysignl
+; CHECK-NEXT: stfd 2, 48(1)
+; CHECK-NEXT: addi 30, 30, -1
+; CHECK-NEXT: lwz 3, 52(1)
+; CHECK-NEXT: cmplwi 30, 0
+; CHECK-NEXT: stfd 1, 40(1)
+; CHECK-NEXT: stw 3, 116(1)
+; CHECK-NEXT: lwz 3, 48(1)
+; CHECK-NEXT: stw 3, 112(1)
+; CHECK-NEXT: lwz 3, 44(1)
+; CHECK-NEXT: lfd 0, 112(1)
+; CHECK-NEXT: stw 3, 124(1)
+; CHECK-NEXT: lwz 3, 40(1)
+; CHECK-NEXT: stw 3, 120(1)
+; CHECK-NEXT: lfd 1, 120(1)
+; CHECK-NEXT: bc 12, 1, .LBB0_1
+; CHECK-NEXT: # %bb.2: # %for.end
+; CHECK-NEXT: stfd 1, 16(1)
+; CHECK-NEXT: lwz 3, 20(1)
+; CHECK-NEXT: stfd 0, 8(1)
+; CHECK-NEXT: stw 3, 36(1)
+; CHECK-NEXT: lwz 3, 16(1)
+; CHECK-NEXT: lfd 31, 168(1) # 8-byte Folded Reload
+; CHECK-NEXT: stw 3, 32(1)
+; CHECK-NEXT: lwz 3, 12(1)
+; CHECK-NEXT: lfd 1, 32(1)
+; CHECK-NEXT: stw 3, 28(1)
+; CHECK-NEXT: lwz 3, 8(1)
+; CHECK-NEXT: lwz 30, 160(1) # 4-byte Folded Reload
+; CHECK-NEXT: stw 3, 24(1)
+; CHECK-NEXT: lfd 2, 24(1)
+; CHECK-NEXT: lwz 0, 180(1)
+; CHECK-NEXT: addi 1, 1, 176
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
entry:
br label %for.body
@@ -23,6 +102,5 @@ for.end: ; preds = %for.body
declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
-; CHECK: @foo
; CHECK-NOT: mtctr
>From 5aaacd72260f93d053253a1353ea90ca46208a8c Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Thu, 31 Oct 2024 17:32:35 -0700
Subject: [PATCH 2/5] [CodeGen] Demand llvm.copysign.f{16,32,64,80,128} lowers
without libcalls
This makes real what is already true:
Copysign does not ever need to lower to runtime libcalls!
Its operation should be possible to always implement via bitops.
---
llvm/lib/CodeGen/IntrinsicLowering.cpp | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index f799a8cfc1ba7e..0c3cb8ccc124b6 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -438,7 +438,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
}
case Intrinsic::copysign: {
- ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl");
+ switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+ default:
+ report_fatal_error("copysign intrinsic without arch-specific floats "
+ "reached intrinsic-to-libcall lowering");
+ break;
+ case Type::PPC_FP128TyID:
+ ReplaceCallWith("copysignl", CI, CI->arg_begin(), CI->arg_end(),
+ Type::getFloatTy(CI->getContext()));
+ }
break;
}
case Intrinsic::get_rounding:
>From e277a8ed89c642cdb4838c1c73073eccfae4b77e Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Sat, 5 Oct 2024 15:47:58 -0700
Subject: [PATCH 3/5] [SelectionDAG] Only lower COPYSIGN_PPC_F128 to copysignl
This reduces the burden on frontends that wish to support float ops
without needing a C compiler to build LLVM's compiler-rt for that target,
e.g. so that they can be a fully self-contained toolchain for bare-metal.
All other floats are expanded for all current architectures just fine.
PowerPC, however, does not efficiently legalize its very own float.
---
llvm/include/llvm/IR/RuntimeLibcalls.def | 4 ----
llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 12 ++++++------
llvm/lib/IR/RuntimeLibcalls.cpp | 1 -
llvm/lib/Target/SystemZ/ZOSLibcallNames.def | 3 ---
.../WebAssemblyRuntimeLibcallSignatures.cpp | 3 ---
5 files changed, 6 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 4aab658a86690c..62ee43cfc54ae9 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -289,10 +289,6 @@ HANDLE_LIBCALL(FLOOR_F64, "floor")
HANDLE_LIBCALL(FLOOR_F80, "floorl")
HANDLE_LIBCALL(FLOOR_F128, "floorl")
HANDLE_LIBCALL(FLOOR_PPCF128, "floorl")
-HANDLE_LIBCALL(COPYSIGN_F32, "copysignf")
-HANDLE_LIBCALL(COPYSIGN_F64, "copysign")
-HANDLE_LIBCALL(COPYSIGN_F80, "copysignl")
-HANDLE_LIBCALL(COPYSIGN_F128, "copysignl")
HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl")
HANDLE_LIBCALL(FMIN_F32, "fminf")
HANDLE_LIBCALL(FMIN_F64, "fmin")
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index fa2731ff7dbda7..146b5d720437dd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1709,12 +1709,12 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
- RTLIB::COPYSIGN_F32,
- RTLIB::COPYSIGN_F64,
- RTLIB::COPYSIGN_F80,
- RTLIB::COPYSIGN_F128,
- RTLIB::COPYSIGN_PPCF128), Lo, Hi);
+
+ EVT VT = N->getValueType(0);
+ ExpandFloatRes_Binary(
+ N,
+ (VT == MVT::ppcf128 ? RTLIB::COPYSIGN_PPCF128 : RTLIB::UNKNOWN_LIBCALL),
+ Lo, Hi);
}
void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 06167559a77697..fba997a88279df 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -62,7 +62,6 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
setLibcallName(RTLIB::ROUND_F128, "roundf128");
setLibcallName(RTLIB::ROUNDEVEN_F128, "roundevenf128");
setLibcallName(RTLIB::FLOOR_F128, "floorf128");
- setLibcallName(RTLIB::COPYSIGN_F128, "copysignf128");
setLibcallName(RTLIB::FMIN_F128, "fminf128");
setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
setLibcallName(RTLIB::LROUND_F128, "lroundf128");
diff --git a/llvm/lib/Target/SystemZ/ZOSLibcallNames.def b/llvm/lib/Target/SystemZ/ZOSLibcallNames.def
index 12a01522a7e643..a53c9618696fcc 100644
--- a/llvm/lib/Target/SystemZ/ZOSLibcallNames.def
+++ b/llvm/lib/Target/SystemZ/ZOSLibcallNames.def
@@ -87,9 +87,6 @@ HANDLE_LIBCALL(EXP2_F128, "@@LXP2 at B")
HANDLE_LIBCALL(COS_F64, "@@SCOS at B")
HANDLE_LIBCALL(COS_F32, "@@FCOS at B")
HANDLE_LIBCALL(COS_F128, "@@LCOS at B")
-HANDLE_LIBCALL(COPYSIGN_F64, "@@DCPY at B")
-HANDLE_LIBCALL(COPYSIGN_F32, "@@FCPY at B")
-HANDLE_LIBCALL(COPYSIGN_F128, "@@LCPY at B")
HANDLE_LIBCALL(CEIL_F64, "@@SCEL at B")
HANDLE_LIBCALL(CEIL_F32, "@@FCEL at B")
HANDLE_LIBCALL(CEIL_F128, "@@LCEL at B")
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index aaa52256707210..9a8d73bee1c0ba 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -264,9 +264,6 @@ struct RuntimeLibcallSignatureTable {
Table[RTLIB::FLOOR_F32] = f32_func_f32;
Table[RTLIB::FLOOR_F64] = f64_func_f64;
Table[RTLIB::FLOOR_F128] = i64_i64_func_i64_i64;
- Table[RTLIB::COPYSIGN_F32] = f32_func_f32_f32;
- Table[RTLIB::COPYSIGN_F64] = f64_func_f64_f64;
- Table[RTLIB::COPYSIGN_F128] = i64_i64_func_i64_i64_i64_i64;
Table[RTLIB::FMIN_F32] = f32_func_f32_f32;
Table[RTLIB::FMIN_F64] = f64_func_f64_f64;
Table[RTLIB::FMIN_F128] = i64_i64_func_i64_i64_i64_i64;
>From 5fd0d86737fa7b0a97c6182d323291b558ec8fe9 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Sat, 12 Oct 2024 01:31:45 -0700
Subject: [PATCH 4/5] [PowerPC][SelectionDAG] Expand `@llvm.copysign.ppc_fp128`
without copysignl
This allows ripping out the remaining copysignl infra.
---
llvm/include/llvm/IR/RuntimeLibcalls.def | 1 -
llvm/lib/CodeGen/IntrinsicLowering.cpp | 12 --
.../SelectionDAG/LegalizeFloatTypes.cpp | 17 ++-
llvm/test/CodeGen/PowerPC/copysignl.ll | 105 ++++++-------
llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll | 110 +++++++-------
.../PowerPC/fp128-bitcast-after-operation.ll | 138 +++++++-----------
6 files changed, 161 insertions(+), 222 deletions(-)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 62ee43cfc54ae9..4f986d3c7450bf 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -289,7 +289,6 @@ HANDLE_LIBCALL(FLOOR_F64, "floor")
HANDLE_LIBCALL(FLOOR_F80, "floorl")
HANDLE_LIBCALL(FLOOR_F128, "floorl")
HANDLE_LIBCALL(FLOOR_PPCF128, "floorl")
-HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl")
HANDLE_LIBCALL(FMIN_F32, "fminf")
HANDLE_LIBCALL(FMIN_F64, "fmin")
HANDLE_LIBCALL(FMIN_F80, "fminl")
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 0c3cb8ccc124b6..fda30c65292b2f 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -437,18 +437,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
ReplaceFPIntrinsicWithCall(CI, "roundevenf", "roundeven", "roundevenl");
break;
}
- case Intrinsic::copysign: {
- switch (CI->getArgOperand(0)->getType()->getTypeID()) {
- default:
- report_fatal_error("copysign intrinsic without arch-specific floats "
- "reached intrinsic-to-libcall lowering");
- break;
- case Type::PPC_FP128TyID:
- ReplaceCallWith("copysignl", CI, CI->arg_begin(), CI->arg_end(),
- Type::getFloatTy(CI->getContext()));
- }
- break;
- }
case Intrinsic::get_rounding:
// Lower to "round to the nearest"
if (!CI->getType()->isVoidTy())
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 146b5d720437dd..e30d9b1a0f31fa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1710,11 +1710,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- EVT VT = N->getValueType(0);
- ExpandFloatRes_Binary(
- N,
- (VT == MVT::ppcf128 ? RTLIB::COPYSIGN_PPCF128 : RTLIB::UNKNOWN_LIBCALL),
- Lo, Hi);
+ assert(N->getValueType(0) == MVT::ppcf128 &&
+ "Logic only correct for ppcf128!");
+ SDLoc DL = SDLoc(N);
+ SDValue Tmp = SDValue();
+ GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+
+ Hi = DAG.getNode(ISD::FCOPYSIGN, DL, Tmp.getValueType(), Tmp,
+ N->getOperand(1));
+ // A double-double is Hi + Lo, so if Hi flips sign, so must Lo
+ Lo = DAG.getSelectCC(DL, Tmp, Hi, Lo,
+ DAG.getNode(ISD::FNEG, DL, Lo.getValueType(), Lo),
+ ISD::SETEQ);
}
void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
diff --git a/llvm/test/CodeGen/PowerPC/copysignl.ll b/llvm/test/CodeGen/PowerPC/copysignl.ll
index 40ed3d803094f4..3b865a083dca5e 100644
--- a/llvm/test/CodeGen/PowerPC/copysignl.ll
+++ b/llvm/test/CodeGen/PowerPC/copysignl.ll
@@ -43,31 +43,26 @@ declare double @copysign(double, double) #0
define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
; CHECK-LABEL: foo_ll:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mflr 0
-; CHECK-NEXT: stdu 1, -112(1)
-; CHECK-NEXT: fmr 3, 2
-; CHECK-NEXT: addis 3, 2, .LCPI2_0 at toc@ha
-; CHECK-NEXT: std 0, 128(1)
-; CHECK-NEXT: lfs 2, .LCPI2_0 at toc@l(3)
-; CHECK-NEXT: bl copysignl
-; CHECK-NEXT: nop
-; CHECK-NEXT: addi 1, 1, 112
-; CHECK-NEXT: ld 0, 16(1)
-; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: fcpsgn 0, 2, 1
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: li 4, 8
+; CHECK-NEXT: fcmpu 0, 1, 0
+; CHECK-NEXT: fmr 1, 0
+; CHECK-NEXT: iseleq 3, 4, 3
+; CHECK-NEXT: addis 4, 2, .LCPI2_0 at toc@ha
+; CHECK-NEXT: addi 4, 4, .LCPI2_0 at toc@l
+; CHECK-NEXT: lfdx 2, 4, 3
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: foo_ll:
; CHECK-VSX: # %bb.0: # %entry
-; CHECK-VSX-NEXT: mflr 0
-; CHECK-VSX-NEXT: stdu 1, -112(1)
-; CHECK-VSX-NEXT: fmr 3, 2
+; CHECK-VSX-NEXT: fmr 0, 1
+; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1
; CHECK-VSX-NEXT: xxlxor 2, 2, 2
-; CHECK-VSX-NEXT: std 0, 128(1)
-; CHECK-VSX-NEXT: bl copysignl
-; CHECK-VSX-NEXT: nop
-; CHECK-VSX-NEXT: addi 1, 1, 112
-; CHECK-VSX-NEXT: ld 0, 16(1)
-; CHECK-VSX-NEXT: mtlr 0
+; CHECK-VSX-NEXT: xscmpudp 0, 0, 1
+; CHECK-VSX-NEXT: beqlr 0
+; CHECK-VSX-NEXT: # %bb.1: # %entry
+; CHECK-VSX-NEXT: xsnegdp 2, 2
; CHECK-VSX-NEXT: blr
entry:
%conv = fpext double %a to ppc_fp128
@@ -78,31 +73,26 @@ entry:
define ppc_fp128 @foo_ld(double %a, double %b) #0 {
; CHECK-LABEL: foo_ld:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mflr 0
-; CHECK-NEXT: stdu 1, -112(1)
-; CHECK-NEXT: fmr 3, 2
-; CHECK-NEXT: addis 3, 2, .LCPI3_0 at toc@ha
-; CHECK-NEXT: std 0, 128(1)
-; CHECK-NEXT: lfs 2, .LCPI3_0 at toc@l(3)
-; CHECK-NEXT: bl copysignl
-; CHECK-NEXT: nop
-; CHECK-NEXT: addi 1, 1, 112
-; CHECK-NEXT: ld 0, 16(1)
-; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: fcpsgn 0, 2, 1
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: li 4, 8
+; CHECK-NEXT: fcmpu 0, 1, 0
+; CHECK-NEXT: fmr 1, 0
+; CHECK-NEXT: iseleq 3, 4, 3
+; CHECK-NEXT: addis 4, 2, .LCPI3_0 at toc@ha
+; CHECK-NEXT: addi 4, 4, .LCPI3_0 at toc@l
+; CHECK-NEXT: lfdx 2, 4, 3
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: foo_ld:
; CHECK-VSX: # %bb.0: # %entry
-; CHECK-VSX-NEXT: mflr 0
-; CHECK-VSX-NEXT: stdu 1, -112(1)
-; CHECK-VSX-NEXT: fmr 3, 2
+; CHECK-VSX-NEXT: fmr 0, 1
+; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1
; CHECK-VSX-NEXT: xxlxor 2, 2, 2
-; CHECK-VSX-NEXT: std 0, 128(1)
-; CHECK-VSX-NEXT: bl copysignl
-; CHECK-VSX-NEXT: nop
-; CHECK-VSX-NEXT: addi 1, 1, 112
-; CHECK-VSX-NEXT: ld 0, 16(1)
-; CHECK-VSX-NEXT: mtlr 0
+; CHECK-VSX-NEXT: xscmpudp 0, 0, 1
+; CHECK-VSX-NEXT: beqlr 0
+; CHECK-VSX-NEXT: # %bb.1: # %entry
+; CHECK-VSX-NEXT: xsnegdp 2, 2
; CHECK-VSX-NEXT: blr
entry:
%conv = fpext double %a to ppc_fp128
@@ -114,31 +104,26 @@ entry:
define ppc_fp128 @foo_lf(double %a, float %b) #0 {
; CHECK-LABEL: foo_lf:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mflr 0
-; CHECK-NEXT: stdu 1, -112(1)
-; CHECK-NEXT: fmr 3, 2
-; CHECK-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
-; CHECK-NEXT: std 0, 128(1)
-; CHECK-NEXT: lfs 2, .LCPI4_0 at toc@l(3)
-; CHECK-NEXT: bl copysignl
-; CHECK-NEXT: nop
-; CHECK-NEXT: addi 1, 1, 112
-; CHECK-NEXT: ld 0, 16(1)
-; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: fcpsgn 0, 2, 1
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: li 4, 8
+; CHECK-NEXT: fcmpu 0, 1, 0
+; CHECK-NEXT: fmr 1, 0
+; CHECK-NEXT: iseleq 3, 4, 3
+; CHECK-NEXT: addis 4, 2, .LCPI4_0 at toc@ha
+; CHECK-NEXT: addi 4, 4, .LCPI4_0 at toc@l
+; CHECK-NEXT: lfdx 2, 4, 3
; CHECK-NEXT: blr
;
; CHECK-VSX-LABEL: foo_lf:
; CHECK-VSX: # %bb.0: # %entry
-; CHECK-VSX-NEXT: mflr 0
-; CHECK-VSX-NEXT: stdu 1, -112(1)
-; CHECK-VSX-NEXT: fmr 3, 2
+; CHECK-VSX-NEXT: fmr 0, 1
+; CHECK-VSX-NEXT: fcpsgn 1, 2, 1
; CHECK-VSX-NEXT: xxlxor 2, 2, 2
-; CHECK-VSX-NEXT: std 0, 128(1)
-; CHECK-VSX-NEXT: bl copysignl
-; CHECK-VSX-NEXT: nop
-; CHECK-VSX-NEXT: addi 1, 1, 112
-; CHECK-VSX-NEXT: ld 0, 16(1)
-; CHECK-VSX-NEXT: mtlr 0
+; CHECK-VSX-NEXT: xscmpudp 0, 0, 1
+; CHECK-VSX-NEXT: beqlr 0
+; CHECK-VSX-NEXT: # %bb.1: # %entry
+; CHECK-VSX-NEXT: xsnegdp 2, 2
; CHECK-VSX-NEXT: blr
entry:
%conv = fpext double %a to ppc_fp128
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
index 1d1612928dbc2a..ff8311c131f76a 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
@@ -4,84 +4,81 @@
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
target triple = "powerpc-unknown-linux-gnu"
+; Previously we checked that loops that used CTR would not be used around a libm call to copysignl
+; but now that copysignl is no longer emitted by LLVM in most cases, this stands as a tombstone.
+; It has mtctr right in the middle, but we don't care because copysignl is nowhere to be found.
+
define ppc_fp128 @foo(ptr nocapture %n, ppc_fp128 %d) nounwind readonly {
; CHECK-LABEL: foo:
; CHECK-NOT: mtctr
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: mflr 0
-; CHECK-NEXT: stwu 1, -176(1)
-; CHECK-NEXT: stw 0, 180(1)
-; CHECK-NEXT: stfd 2, 128(1)
-; CHECK-NEXT: lwz 3, 132(1)
-; CHECK-NEXT: stfd 1, 136(1)
-; CHECK-NEXT: stw 3, 148(1)
-; CHECK-NEXT: lwz 3, 128(1)
-; CHECK-NEXT: stfd 31, 168(1) # 8-byte Folded Spill
-; CHECK-NEXT: stw 3, 144(1)
-; CHECK-NEXT: lwz 3, 140(1)
-; CHECK-NEXT: lfd 0, 144(1)
-; CHECK-NEXT: stw 3, 156(1)
-; CHECK-NEXT: lwz 3, 136(1)
-; CHECK-NEXT: stw 30, 160(1) # 4-byte Folded Spill
-; CHECK-NEXT: li 30, 2048
-; CHECK-NEXT: stw 3, 152(1)
-; CHECK-NEXT: lfd 31, 152(1)
-; CHECK-NEXT: fmr 1, 31
+; CHECK-NEXT: stwu 1, -112(1)
+; CHECK-NEXT: stfd 2, 80(1)
+; CHECK-NEXT: li 3, 2048
+; CHECK-NEXT: lwz 4, 84(1)
+; CHECK-NEXT: stfd 1, 88(1)
+; CHECK-NEXT: stw 4, 100(1)
+; CHECK-NEXT: lwz 4, 80(1)
+; CHECK-NEXT: stw 4, 96(1)
+; CHECK-NEXT: lwz 4, 92(1)
+; CHECK-NEXT: lfd 1, 96(1)
+; CHECK-NEXT: stw 4, 108(1)
+; CHECK-NEXT: lwz 4, 88(1)
+; CHECK-NEXT: stw 4, 104(1)
+; CHECK-NEXT: lfd 0, 104(1)
+; CHECK-NEXT: mtctr 3
+; CHECK-NEXT: fmr 2, 0
+; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: stfd 1, 64(1)
-; CHECK-NEXT: lwz 3, 68(1)
-; CHECK-NEXT: stfd 31, 88(1)
-; CHECK-NEXT: stw 3, 84(1)
-; CHECK-NEXT: lwz 3, 64(1)
-; CHECK-NEXT: stfd 0, 56(1)
-; CHECK-NEXT: stw 3, 80(1)
-; CHECK-NEXT: lwz 3, 92(1)
-; CHECK-NEXT: lfd 4, 96(1)
-; CHECK-NEXT: stw 3, 108(1)
-; CHECK-NEXT: lwz 3, 88(1)
-; CHECK-NEXT: lfd 1, 80(1)
-; CHECK-NEXT: stw 3, 104(1)
; CHECK-NEXT: lwz 3, 60(1)
-; CHECK-NEXT: lfd 3, 104(1)
+; CHECK-NEXT: stfd 1, 48(1)
; CHECK-NEXT: stw 3, 76(1)
; CHECK-NEXT: lwz 3, 56(1)
; CHECK-NEXT: stw 3, 72(1)
-; CHECK-NEXT: lfd 2, 72(1)
-; CHECK-NEXT: bl copysignl
-; CHECK-NEXT: stfd 2, 48(1)
-; CHECK-NEXT: addi 30, 30, -1
; CHECK-NEXT: lwz 3, 52(1)
-; CHECK-NEXT: cmplwi 30, 0
-; CHECK-NEXT: stfd 1, 40(1)
-; CHECK-NEXT: stw 3, 116(1)
+; CHECK-NEXT: lfd 2, 72(1)
+; CHECK-NEXT: stw 3, 68(1)
; CHECK-NEXT: lwz 3, 48(1)
-; CHECK-NEXT: stw 3, 112(1)
-; CHECK-NEXT: lwz 3, 44(1)
-; CHECK-NEXT: lfd 0, 112(1)
-; CHECK-NEXT: stw 3, 124(1)
-; CHECK-NEXT: lwz 3, 40(1)
-; CHECK-NEXT: stw 3, 120(1)
-; CHECK-NEXT: lfd 1, 120(1)
-; CHECK-NEXT: bc 12, 1, .LBB0_1
-; CHECK-NEXT: # %bb.2: # %for.end
-; CHECK-NEXT: stfd 1, 16(1)
+; CHECK-NEXT: stw 3, 64(1)
+; CHECK-NEXT: lfd 1, 64(1)
+; CHECK-NEXT: bdz .LBB0_7
+; CHECK-NEXT: .LBB0_2: # %for.body
+; CHECK-NEXT: #
+; CHECK-NEXT: stfd 0, 40(1)
+; CHECK-NEXT: lbz 3, 40(1)
+; CHECK-NEXT: srwi 3, 3, 7
+; CHECK-NEXT: andi. 3, 3, 1
+; CHECK-NEXT: bc 12, 1, .LBB0_4
+; CHECK-NEXT: # %bb.3: # %for.body
+; CHECK-NEXT: #
+; CHECK-NEXT: fabs 3, 2
+; CHECK-NEXT: b .LBB0_5
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: fnabs 3, 2
+; CHECK-NEXT: .LBB0_5: # %for.body
+; CHECK-NEXT: #
+; CHECK-NEXT: fcmpu 0, 2, 3
+; CHECK-NEXT: stfd 3, 56(1)
+; CHECK-NEXT: beq 0, .LBB0_1
+; CHECK-NEXT: # %bb.6: # %for.body
+; CHECK-NEXT: #
+; CHECK-NEXT: fneg 1, 1
+; CHECK-NEXT: b .LBB0_1
+; CHECK-NEXT: .LBB0_7: # %for.end
+; CHECK-NEXT: stfd 2, 16(1)
; CHECK-NEXT: lwz 3, 20(1)
-; CHECK-NEXT: stfd 0, 8(1)
+; CHECK-NEXT: stfd 1, 8(1)
; CHECK-NEXT: stw 3, 36(1)
; CHECK-NEXT: lwz 3, 16(1)
-; CHECK-NEXT: lfd 31, 168(1) # 8-byte Folded Reload
; CHECK-NEXT: stw 3, 32(1)
; CHECK-NEXT: lwz 3, 12(1)
; CHECK-NEXT: lfd 1, 32(1)
; CHECK-NEXT: stw 3, 28(1)
; CHECK-NEXT: lwz 3, 8(1)
-; CHECK-NEXT: lwz 30, 160(1) # 4-byte Folded Reload
; CHECK-NEXT: stw 3, 24(1)
; CHECK-NEXT: lfd 2, 24(1)
-; CHECK-NEXT: lwz 0, 180(1)
-; CHECK-NEXT: addi 1, 1, 176
-; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: addi 1, 1, 112
; CHECK-NEXT: blr
entry:
br label %for.body
@@ -103,4 +100,3 @@ for.end: ; preds = %for.body
declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
; CHECK-NOT: mtctr
-
diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index ebec8c1c4d6543..967e6cf679d4c8 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -89,100 +89,59 @@ entry:
}
define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind {
-; PPC64-P8-LE-LABEL: test_copysign:
-; PPC64-P8-LE: # %bb.0: # %entry
-; PPC64-P8-LE-NEXT: mflr 0
-; PPC64-P8-LE-NEXT: stdu 1, -32(1)
-; PPC64-P8-LE-NEXT: std 0, 48(1)
-; PPC64-P8-LE-NEXT: bl copysignl
-; PPC64-P8-LE-NEXT: nop
-; PPC64-P8-LE-NEXT: mffprd 3, 1
-; PPC64-P8-LE-NEXT: mffprd 4, 2
-; PPC64-P8-LE-NEXT: addi 1, 1, 32
-; PPC64-P8-LE-NEXT: ld 0, 16(1)
-; PPC64-P8-LE-NEXT: mtlr 0
-; PPC64-P8-LE-NEXT: blr
-;
-; PPC64-LE-LABEL: test_copysign:
-; PPC64-LE: # %bb.0: # %entry
-; PPC64-LE-NEXT: mflr 0
-; PPC64-LE-NEXT: stdu 1, -48(1)
-; PPC64-LE-NEXT: std 0, 64(1)
-; PPC64-LE-NEXT: bl copysignl
-; PPC64-LE-NEXT: nop
-; PPC64-LE-NEXT: stfd 1, 32(1)
-; PPC64-LE-NEXT: stfd 2, 40(1)
-; PPC64-LE-NEXT: ld 3, 32(1)
-; PPC64-LE-NEXT: ld 4, 40(1)
-; PPC64-LE-NEXT: addi 1, 1, 48
-; PPC64-LE-NEXT: ld 0, 16(1)
-; PPC64-LE-NEXT: mtlr 0
-; PPC64-LE-NEXT: blr
-;
-; PPC64-P8-BE-LABEL: test_copysign:
-; PPC64-P8-BE: # %bb.0: # %entry
-; PPC64-P8-BE-NEXT: mflr 0
-; PPC64-P8-BE-NEXT: stdu 1, -112(1)
-; PPC64-P8-BE-NEXT: std 0, 128(1)
-; PPC64-P8-BE-NEXT: bl copysignl
-; PPC64-P8-BE-NEXT: nop
-; PPC64-P8-BE-NEXT: mffprd 3, 1
-; PPC64-P8-BE-NEXT: mffprd 4, 2
-; PPC64-P8-BE-NEXT: addi 1, 1, 112
-; PPC64-P8-BE-NEXT: ld 0, 16(1)
-; PPC64-P8-BE-NEXT: mtlr 0
-; PPC64-P8-BE-NEXT: blr
+; PPC64-P8-LABEL: test_copysign:
+; PPC64-P8: # %bb.0: # %entry
+; PPC64-P8-NEXT: xscpsgndp 0, 3, 1
+; PPC64-P8-NEXT: xscmpudp 0, 1, 0
+; PPC64-P8-NEXT: beq 0, .LBB2_2
+; PPC64-P8-NEXT: # %bb.1: # %entry
+; PPC64-P8-NEXT: xsnegdp 2, 2
+; PPC64-P8-NEXT: .LBB2_2: # %entry
+; PPC64-P8-NEXT: mffprd 3, 0
+; PPC64-P8-NEXT: mffprd 4, 2
+; PPC64-P8-NEXT: blr
;
-; PPC64-BE-LABEL: test_copysign:
-; PPC64-BE: # %bb.0: # %entry
-; PPC64-BE-NEXT: mflr 0
-; PPC64-BE-NEXT: stdu 1, -128(1)
-; PPC64-BE-NEXT: std 0, 144(1)
-; PPC64-BE-NEXT: bl copysignl
-; PPC64-BE-NEXT: nop
-; PPC64-BE-NEXT: stfd 1, 112(1)
-; PPC64-BE-NEXT: stfd 2, 120(1)
-; PPC64-BE-NEXT: ld 3, 112(1)
-; PPC64-BE-NEXT: ld 4, 120(1)
-; PPC64-BE-NEXT: addi 1, 1, 128
-; PPC64-BE-NEXT: ld 0, 16(1)
-; PPC64-BE-NEXT: mtlr 0
-; PPC64-BE-NEXT: blr
+; PPC64-LABEL: test_copysign:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: xscpsgndp 0, 3, 1
+; PPC64-NEXT: xscmpudp 0, 1, 0
+; PPC64-NEXT: beq 0, .LBB2_2
+; PPC64-NEXT: # %bb.1: # %entry
+; PPC64-NEXT: xsnegdp 2, 2
+; PPC64-NEXT: .LBB2_2: # %entry
+; PPC64-NEXT: stfd 0, -16(1)
+; PPC64-NEXT: stfd 2, -8(1)
+; PPC64-NEXT: ld 3, -16(1)
+; PPC64-NEXT: ld 4, -8(1)
+; PPC64-NEXT: blr
;
; PPC32-LABEL: test_copysign:
; PPC32: # %bb.0: # %entry
-; PPC32-NEXT: mflr 0
-; PPC32-NEXT: stwu 1, -80(1)
-; PPC32-NEXT: stw 0, 84(1)
-; PPC32-NEXT: stfd 1, 32(1)
-; PPC32-NEXT: lwz 3, 36(1)
-; PPC32-NEXT: stfd 2, 24(1)
-; PPC32-NEXT: stw 3, 52(1)
-; PPC32-NEXT: lwz 3, 32(1)
-; PPC32-NEXT: stfd 3, 56(1)
-; PPC32-NEXT: stw 3, 48(1)
-; PPC32-NEXT: lwz 3, 28(1)
-; PPC32-NEXT: lfd 4, 64(1)
-; PPC32-NEXT: stw 3, 44(1)
-; PPC32-NEXT: lwz 3, 24(1)
-; PPC32-NEXT: lfd 1, 48(1)
-; PPC32-NEXT: stw 3, 40(1)
-; PPC32-NEXT: lwz 3, 60(1)
-; PPC32-NEXT: lfd 2, 40(1)
-; PPC32-NEXT: stw 3, 76(1)
-; PPC32-NEXT: lwz 3, 56(1)
-; PPC32-NEXT: stw 3, 72(1)
-; PPC32-NEXT: lfd 3, 72(1)
-; PPC32-NEXT: bl copysignl
-; PPC32-NEXT: stfd 1, 8(1)
+; PPC32-NEXT: stwu 1, -32(1)
+; PPC32-NEXT: stfd 3, 8(1)
+; PPC32-NEXT: lbz 3, 8(1)
+; PPC32-NEXT: srwi 3, 3, 7
+; PPC32-NEXT: andi. 3, 3, 1
+; PPC32-NEXT: bc 12, 1, .LBB2_2
+; PPC32-NEXT: # %bb.1: # %entry
+; PPC32-NEXT: fabs 0, 1
+; PPC32-NEXT: fcmpu 0, 1, 0
+; PPC32-NEXT: bne 0, .LBB2_3
+; PPC32-NEXT: b .LBB2_4
+; PPC32-NEXT: .LBB2_2:
+; PPC32-NEXT: fnabs 0, 1
+; PPC32-NEXT: fcmpu 0, 1, 0
+; PPC32-NEXT: beq 0, .LBB2_4
+; PPC32-NEXT: .LBB2_3: # %entry
+; PPC32-NEXT: fneg 2, 2
+; PPC32-NEXT: .LBB2_4: # %entry
+; PPC32-NEXT: stfd 0, 24(1)
; PPC32-NEXT: stfd 2, 16(1)
-; PPC32-NEXT: lwz 3, 8(1)
-; PPC32-NEXT: lwz 4, 12(1)
+; PPC32-NEXT: lwz 3, 24(1)
+; PPC32-NEXT: lwz 4, 28(1)
; PPC32-NEXT: lwz 5, 16(1)
; PPC32-NEXT: lwz 6, 20(1)
-; PPC32-NEXT: lwz 0, 84(1)
-; PPC32-NEXT: addi 1, 1, 80
-; PPC32-NEXT: mtlr 0
+; PPC32-NEXT: addi 1, 1, 32
; PPC32-NEXT: blr
entry:
%0 = tail call ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 %x, ppc_fp128 %y)
@@ -236,3 +195,8 @@ entry:
declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128)
declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128, ppc_fp128)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; PPC64-BE: {{.*}}
+; PPC64-LE: {{.*}}
+; PPC64-P8-BE: {{.*}}
+; PPC64-P8-LE: {{.*}}
>From bd017ccac36fe397e2f12bd355f8c7a0c55420c9 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Tue, 29 Oct 2024 02:57:15 -0700
Subject: [PATCH 5/5] [PowerPC][SelectionDAG] Use SETUEQ in copysign expansion
---
.../SelectionDAG/LegalizeFloatTypes.cpp | 2 +-
llvm/test/CodeGen/PowerPC/copysignl.ll | 45 ++++++++++---------
llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll | 3 +-
.../PowerPC/fp128-bitcast-after-operation.ll | 24 +++++-----
4 files changed, 40 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e30d9b1a0f31fa..b7d512f5a1a4b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1721,7 +1721,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
// A double-double is Hi + Lo, so if Hi flips sign, so must Lo
Lo = DAG.getSelectCC(DL, Tmp, Hi, Lo,
DAG.getNode(ISD::FNEG, DL, Lo.getValueType(), Lo),
- ISD::SETEQ);
+ ISD::SETUEQ);
}
void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
diff --git a/llvm/test/CodeGen/PowerPC/copysignl.ll b/llvm/test/CodeGen/PowerPC/copysignl.ll
index 3b865a083dca5e..9cefe66df90bd9 100644
--- a/llvm/test/CodeGen/PowerPC/copysignl.ll
+++ b/llvm/test/CodeGen/PowerPC/copysignl.ll
@@ -44,13 +44,13 @@ define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
; CHECK-LABEL: foo_ll:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fcpsgn 0, 2, 1
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: li 4, 8
-; CHECK-NEXT: fcmpu 0, 1, 0
-; CHECK-NEXT: fmr 1, 0
-; CHECK-NEXT: iseleq 3, 4, 3
+; CHECK-NEXT: li 3, 8
; CHECK-NEXT: addis 4, 2, .LCPI2_0 at toc@ha
; CHECK-NEXT: addi 4, 4, .LCPI2_0 at toc@l
+; CHECK-NEXT: fcmpu 0, 1, 0
+; CHECK-NEXT: fmr 1, 0
+; CHECK-NEXT: crnor 20, 2, 3
+; CHECK-NEXT: isel 3, 0, 3, 20
; CHECK-NEXT: lfdx 2, 4, 3
; CHECK-NEXT: blr
;
@@ -59,8 +59,9 @@ define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
; CHECK-VSX-NEXT: fmr 0, 1
; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1
; CHECK-VSX-NEXT: xxlxor 2, 2, 2
-; CHECK-VSX-NEXT: xscmpudp 0, 0, 1
-; CHECK-VSX-NEXT: beqlr 0
+; CHECK-VSX-NEXT: fcmpu 0, 0, 1
+; CHECK-VSX-NEXT: cror 20, 2, 3
+; CHECK-VSX-NEXT: bclr 12, 20, 0
; CHECK-VSX-NEXT: # %bb.1: # %entry
; CHECK-VSX-NEXT: xsnegdp 2, 2
; CHECK-VSX-NEXT: blr
@@ -74,13 +75,13 @@ define ppc_fp128 @foo_ld(double %a, double %b) #0 {
; CHECK-LABEL: foo_ld:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fcpsgn 0, 2, 1
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: li 4, 8
-; CHECK-NEXT: fcmpu 0, 1, 0
-; CHECK-NEXT: fmr 1, 0
-; CHECK-NEXT: iseleq 3, 4, 3
+; CHECK-NEXT: li 3, 8
; CHECK-NEXT: addis 4, 2, .LCPI3_0 at toc@ha
; CHECK-NEXT: addi 4, 4, .LCPI3_0 at toc@l
+; CHECK-NEXT: fcmpu 0, 1, 0
+; CHECK-NEXT: fmr 1, 0
+; CHECK-NEXT: crnor 20, 2, 3
+; CHECK-NEXT: isel 3, 0, 3, 20
; CHECK-NEXT: lfdx 2, 4, 3
; CHECK-NEXT: blr
;
@@ -89,8 +90,9 @@ define ppc_fp128 @foo_ld(double %a, double %b) #0 {
; CHECK-VSX-NEXT: fmr 0, 1
; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1
; CHECK-VSX-NEXT: xxlxor 2, 2, 2
-; CHECK-VSX-NEXT: xscmpudp 0, 0, 1
-; CHECK-VSX-NEXT: beqlr 0
+; CHECK-VSX-NEXT: fcmpu 0, 0, 1
+; CHECK-VSX-NEXT: cror 20, 2, 3
+; CHECK-VSX-NEXT: bclr 12, 20, 0
; CHECK-VSX-NEXT: # %bb.1: # %entry
; CHECK-VSX-NEXT: xsnegdp 2, 2
; CHECK-VSX-NEXT: blr
@@ -105,13 +107,13 @@ define ppc_fp128 @foo_lf(double %a, float %b) #0 {
; CHECK-LABEL: foo_lf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: fcpsgn 0, 2, 1
-; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: li 4, 8
-; CHECK-NEXT: fcmpu 0, 1, 0
-; CHECK-NEXT: fmr 1, 0
-; CHECK-NEXT: iseleq 3, 4, 3
+; CHECK-NEXT: li 3, 8
; CHECK-NEXT: addis 4, 2, .LCPI4_0 at toc@ha
; CHECK-NEXT: addi 4, 4, .LCPI4_0 at toc@l
+; CHECK-NEXT: fcmpu 0, 1, 0
+; CHECK-NEXT: fmr 1, 0
+; CHECK-NEXT: crnor 20, 2, 3
+; CHECK-NEXT: isel 3, 0, 3, 20
; CHECK-NEXT: lfdx 2, 4, 3
; CHECK-NEXT: blr
;
@@ -120,8 +122,9 @@ define ppc_fp128 @foo_lf(double %a, float %b) #0 {
; CHECK-VSX-NEXT: fmr 0, 1
; CHECK-VSX-NEXT: fcpsgn 1, 2, 1
; CHECK-VSX-NEXT: xxlxor 2, 2, 2
-; CHECK-VSX-NEXT: xscmpudp 0, 0, 1
-; CHECK-VSX-NEXT: beqlr 0
+; CHECK-VSX-NEXT: fcmpu 0, 0, 1
+; CHECK-VSX-NEXT: cror 20, 2, 3
+; CHECK-VSX-NEXT: bclr 12, 20, 0
; CHECK-VSX-NEXT: # %bb.1: # %entry
; CHECK-VSX-NEXT: xsnegdp 2, 2
; CHECK-VSX-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
index ff8311c131f76a..82f8ba3bfda53b 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
@@ -59,8 +59,9 @@ define ppc_fp128 @foo(ptr nocapture %n, ppc_fp128 %d) nounwind readonly {
; CHECK-NEXT: .LBB0_5: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: fcmpu 0, 2, 3
+; CHECK-NEXT: cror 20, 2, 3
; CHECK-NEXT: stfd 3, 56(1)
-; CHECK-NEXT: beq 0, .LBB0_1
+; CHECK-NEXT: bc 12, 20, .LBB0_1
; CHECK-NEXT: # %bb.6: # %for.body
; CHECK-NEXT: #
; CHECK-NEXT: fneg 1, 1
diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index 967e6cf679d4c8..c51b98de5cdb04 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -92,20 +92,22 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind {
; PPC64-P8-LABEL: test_copysign:
; PPC64-P8: # %bb.0: # %entry
; PPC64-P8-NEXT: xscpsgndp 0, 3, 1
-; PPC64-P8-NEXT: xscmpudp 0, 1, 0
-; PPC64-P8-NEXT: beq 0, .LBB2_2
+; PPC64-P8-NEXT: fcmpu 0, 1, 0
+; PPC64-P8-NEXT: cror 20, 2, 3
+; PPC64-P8-NEXT: bc 12, 20, .LBB2_2
; PPC64-P8-NEXT: # %bb.1: # %entry
; PPC64-P8-NEXT: xsnegdp 2, 2
; PPC64-P8-NEXT: .LBB2_2: # %entry
-; PPC64-P8-NEXT: mffprd 3, 0
; PPC64-P8-NEXT: mffprd 4, 2
+; PPC64-P8-NEXT: mffprd 3, 0
; PPC64-P8-NEXT: blr
;
; PPC64-LABEL: test_copysign:
; PPC64: # %bb.0: # %entry
; PPC64-NEXT: xscpsgndp 0, 3, 1
-; PPC64-NEXT: xscmpudp 0, 1, 0
-; PPC64-NEXT: beq 0, .LBB2_2
+; PPC64-NEXT: fcmpu 0, 1, 0
+; PPC64-NEXT: cror 20, 2, 3
+; PPC64-NEXT: bc 12, 20, .LBB2_2
; PPC64-NEXT: # %bb.1: # %entry
; PPC64-NEXT: xsnegdp 2, 2
; PPC64-NEXT: .LBB2_2: # %entry
@@ -125,16 +127,16 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind {
; PPC32-NEXT: bc 12, 1, .LBB2_2
; PPC32-NEXT: # %bb.1: # %entry
; PPC32-NEXT: fabs 0, 1
-; PPC32-NEXT: fcmpu 0, 1, 0
-; PPC32-NEXT: bne 0, .LBB2_3
-; PPC32-NEXT: b .LBB2_4
+; PPC32-NEXT: b .LBB2_3
; PPC32-NEXT: .LBB2_2:
; PPC32-NEXT: fnabs 0, 1
-; PPC32-NEXT: fcmpu 0, 1, 0
-; PPC32-NEXT: beq 0, .LBB2_4
; PPC32-NEXT: .LBB2_3: # %entry
+; PPC32-NEXT: fcmpu 0, 1, 0
+; PPC32-NEXT: cror 20, 2, 3
+; PPC32-NEXT: bc 12, 20, .LBB2_5
+; PPC32-NEXT: # %bb.4: # %entry
; PPC32-NEXT: fneg 2, 2
-; PPC32-NEXT: .LBB2_4: # %entry
+; PPC32-NEXT: .LBB2_5: # %entry
; PPC32-NEXT: stfd 0, 24(1)
; PPC32-NEXT: stfd 2, 16(1)
; PPC32-NEXT: lwz 3, 24(1)
More information about the llvm-commits
mailing list