[llvm] [LLVM] Slay undead copysign code (PR #111269)

Tue Oct 29 03:03:51 PDT 2024

https://github.com/workingjubilee updated https://github.com/llvm/llvm-project/pull/111269

>From e83906437407636df5867869d50508683588ede4 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Sat, 5 Oct 2024 15:42:49 -0700
Subject: [PATCH 1/7] [Codegen] Demand llvm.copysign.f{16,32,64,80,128} lowers
 without libcalls

This makes real what is already true:
Copysign does not ever need to lower to runtime libcalls!
Its operation should be possible to always implement via bitops.
---
 llvm/lib/CodeGen/IntrinsicLowering.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 256c081b46e262..f8833c0a811c11 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -438,7 +438,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
   case Intrinsic::copysign: {
-    ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl");
+    switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+    default:
+      report_fatal_error("copysign intrinsic without arch-specific floats "
+                         "reached intrinsic-to-libcall lowering");
+      break;
+    case Type::PPC_FP128TyID:
+      ReplaceCallWith("copysignl", CI, CI->arg_begin(), CI->arg_end(),
+                      Type::getFloatTy(CI->getContext()));
+    }
     break;
   }
   case Intrinsic::get_rounding:

>From 89f095a5bfa9d3711b8197bd9f268295552c37aa Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Sat, 5 Oct 2024 15:47:58 -0700
Subject: [PATCH 2/7] [IR] Remove COPYSIGN_F{32,64,80,128} from runtime
 libcalls

This reduces the burden on frontends that wish to support float ops
without needing a C compiler to build LLVM's compiler-rt for that target,
e.g. so that they can be a fully self-contained toolchain for bare-metal.

Unfortunately, we have to leave the PowerPC copysignl behind.
---
 llvm/include/llvm/IR/RuntimeLibcalls.def                      | 4 ----
 llvm/lib/IR/RuntimeLibcalls.cpp                               | 1 -
 llvm/lib/Target/SystemZ/ZOSLibcallNames.def                   | 3 ---
 .../WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp       | 3 ---
 4 files changed, 11 deletions(-)

diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 69cf43140ad4bd..603943a5fd2fbb 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -284,10 +284,6 @@ HANDLE_LIBCALL(FLOOR_F64, "floor")
 HANDLE_LIBCALL(FLOOR_F80, "floorl")
 HANDLE_LIBCALL(FLOOR_F128, "floorl")
 HANDLE_LIBCALL(FLOOR_PPCF128, "floorl")
-HANDLE_LIBCALL(COPYSIGN_F32, "copysignf")
-HANDLE_LIBCALL(COPYSIGN_F64, "copysign")
-HANDLE_LIBCALL(COPYSIGN_F80, "copysignl")
-HANDLE_LIBCALL(COPYSIGN_F128, "copysignl")
 HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl")
 HANDLE_LIBCALL(FMIN_F32, "fminf")
 HANDLE_LIBCALL(FMIN_F64, "fmin")
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index d806f8093459ee..ca78892ec78838 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -61,7 +61,6 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
     setLibcallName(RTLIB::ROUND_F128, "roundf128");
     setLibcallName(RTLIB::ROUNDEVEN_F128, "roundevenf128");
     setLibcallName(RTLIB::FLOOR_F128, "floorf128");
-    setLibcallName(RTLIB::COPYSIGN_F128, "copysignf128");
     setLibcallName(RTLIB::FMIN_F128, "fminf128");
     setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
     setLibcallName(RTLIB::LROUND_F128, "lroundf128");
diff --git a/llvm/lib/Target/SystemZ/ZOSLibcallNames.def b/llvm/lib/Target/SystemZ/ZOSLibcallNames.def
index 12a01522a7e643..a53c9618696fcc 100644
--- a/llvm/lib/Target/SystemZ/ZOSLibcallNames.def
+++ b/llvm/lib/Target/SystemZ/ZOSLibcallNames.def
@@ -87,9 +87,6 @@ HANDLE_LIBCALL(EXP2_F128, "@@LXP2 at B")
 HANDLE_LIBCALL(COS_F64, "@@SCOS at B")
 HANDLE_LIBCALL(COS_F32, "@@FCOS at B")
 HANDLE_LIBCALL(COS_F128, "@@LCOS at B")
-HANDLE_LIBCALL(COPYSIGN_F64, "@@DCPY at B")
-HANDLE_LIBCALL(COPYSIGN_F32, "@@FCPY at B")
-HANDLE_LIBCALL(COPYSIGN_F128, "@@LCPY at B")
 HANDLE_LIBCALL(CEIL_F64, "@@SCEL at B")
 HANDLE_LIBCALL(CEIL_F32, "@@FCEL at B")
 HANDLE_LIBCALL(CEIL_F128, "@@LCEL at B")
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index ba3ab5164af267..13c8a6fe1524fc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -261,9 +261,6 @@ struct RuntimeLibcallSignatureTable {
     Table[RTLIB::FLOOR_F32] = f32_func_f32;
     Table[RTLIB::FLOOR_F64] = f64_func_f64;
     Table[RTLIB::FLOOR_F128] = i64_i64_func_i64_i64;
-    Table[RTLIB::COPYSIGN_F32] = f32_func_f32_f32;
-    Table[RTLIB::COPYSIGN_F64] = f64_func_f64_f64;
-    Table[RTLIB::COPYSIGN_F128] = i64_i64_func_i64_i64_i64_i64;
     Table[RTLIB::FMIN_F32] = f32_func_f32_f32;
     Table[RTLIB::FMIN_F64] = f64_func_f64_f64;
     Table[RTLIB::FMIN_F128] = i64_i64_func_i64_i64_i64_i64;

>From f68c342745babc52bcc51101e1416dd8a88596eb Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Sat, 5 Oct 2024 17:06:00 -0700
Subject: [PATCH 3/7] [SelectionDAG] Only lower COPYSIGN for ppcf128 to
 copysignl

All other floats are expanded for all current architectures just fine.
PowerPC, however, does not efficiently legalize its very own float.
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2c81c829e75cbb..f8c0e4c6a0831d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1649,12 +1649,12 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
 
 void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
                                                 SDValue &Lo, SDValue &Hi) {
-  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
-                                        RTLIB::COPYSIGN_F32,
-                                        RTLIB::COPYSIGN_F64,
-                                        RTLIB::COPYSIGN_F80,
-                                        RTLIB::COPYSIGN_F128,
-                                        RTLIB::COPYSIGN_PPCF128), Lo, Hi);
+
+  EVT VT = N->getValueType(0);
+  ExpandFloatRes_Binary(
+      N,
+      (VT == MVT::ppcf128 ? RTLIB::COPYSIGN_PPCF128 : RTLIB::UNKNOWN_LIBCALL),
+      Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,

>From 7ec1b1c0ecf7d39086b50a0c24a8f08c348fb7da Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Sat, 12 Oct 2024 01:31:45 -0700
Subject: [PATCH 4/7] [SelectionDAG] Expand `@llvm.copysign.ppc_fp128` without
 copysignl

---
 .../CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f8c0e4c6a0831d..d0c4764732a4ea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1650,11 +1650,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
 void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
                                                 SDValue &Lo, SDValue &Hi) {
 
-  EVT VT = N->getValueType(0);
-  ExpandFloatRes_Binary(
-      N,
-      (VT == MVT::ppcf128 ? RTLIB::COPYSIGN_PPCF128 : RTLIB::UNKNOWN_LIBCALL),
-      Lo, Hi);
+  assert(N->getValueType(0) == MVT::ppcf128 &&
+         "Logic only correct for ppcf128!");
+  SDLoc DL = SDLoc(N);
+  SDValue Tmp = SDValue();
+  GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+
+  Hi = DAG.getNode(ISD::FCOPYSIGN, DL, Tmp.getValueType(), Tmp,
+                   N->getOperand(1));
+  // a double-double is Hi + Lo, so if Hi flips sign, so must Lo
+  Lo = DAG.getSelectCC(DL, Tmp, Hi, Lo,
+                       DAG.getNode(ISD::FNEG, DL, Lo.getValueType(), Lo),
+                       ISD::SETEQ);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,

>From 555b0cdf2a65f8f427a183adea5d44fb49b8e51c Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Sat, 12 Oct 2024 04:06:06 -0700
Subject: [PATCH 5/7] [PowerPC] Rip out remaining copysignl infra

---
 llvm/include/llvm/IR/RuntimeLibcalls.def |  1 -
 llvm/lib/CodeGen/IntrinsicLowering.cpp   | 12 ------------
 2 files changed, 13 deletions(-)

diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 603943a5fd2fbb..df4002d6dcbc55 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -284,7 +284,6 @@ HANDLE_LIBCALL(FLOOR_F64, "floor")
 HANDLE_LIBCALL(FLOOR_F80, "floorl")
 HANDLE_LIBCALL(FLOOR_F128, "floorl")
 HANDLE_LIBCALL(FLOOR_PPCF128, "floorl")
-HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl")
 HANDLE_LIBCALL(FMIN_F32, "fminf")
 HANDLE_LIBCALL(FMIN_F64, "fmin")
 HANDLE_LIBCALL(FMIN_F80, "fminl")
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index f8833c0a811c11..a08628ad3dd2fb 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -437,18 +437,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     ReplaceFPIntrinsicWithCall(CI, "roundevenf", "roundeven", "roundevenl");
     break;
   }
-  case Intrinsic::copysign: {
-    switch (CI->getArgOperand(0)->getType()->getTypeID()) {
-    default:
-      report_fatal_error("copysign intrinsic without arch-specific floats "
-                         "reached intrinsic-to-libcall lowering");
-      break;
-    case Type::PPC_FP128TyID:
-      ReplaceCallWith("copysignl", CI, CI->arg_begin(), CI->arg_end(),
-                      Type::getFloatTy(CI->getContext()));
-    }
-    break;
-  }
   case Intrinsic::get_rounding:
      // Lower to "round to the nearest"
      if (!CI->getType()->isVoidTy())

>From aeb4a6e5a1b48094104dfd331edb917bbd75f4b4 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Tue, 29 Oct 2024 02:56:15 -0700
Subject: [PATCH 6/7] Fixup tests for no libcalls for copysignl

---
 llvm/test/CodeGen/PowerPC/copysignl.ll        | 117 +++++++++++----
 llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll    |  28 ----
 .../PowerPC/fp128-bitcast-after-operation.ll  | 138 +++++++-----------
 3 files changed, 138 insertions(+), 145 deletions(-)
 delete mode 100644 llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll

diff --git a/llvm/test/CodeGen/PowerPC/copysignl.ll b/llvm/test/CodeGen/PowerPC/copysignl.ll
index 427826daa2c638..2a52825158c5ee 100644
--- a/llvm/test/CodeGen/PowerPC/copysignl.ll
+++ b/llvm/test/CodeGen/PowerPC/copysignl.ll
@@ -4,79 +4,136 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "powerpc64-unknown-linux-gnu"
 
 define double @foo_d_ll(ppc_fp128 %a, ppc_fp128 %b) #0 {
+; CHECK-LABEL: foo_d_ll:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fcpsgn 1, 3, 1
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_d_ll:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    xscpsgndp 1, 3, 1
+; CHECK-VSX-NEXT:    blr
 entry:
   %call = tail call ppc_fp128 @copysignl(ppc_fp128 %a, ppc_fp128 %b) #0
   %conv = fptrunc ppc_fp128 %call to double
   ret double %conv
 
-; CHECK-LABEL: @foo_d_ll
-; CHECK: fcpsgn 1, 3, 1
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_d_ll
-; CHECK-VSX: xscpsgndp 1, 3, 1
-; CHECK-VSX: blr
 }
 
 declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
 
 define double @foo_dl(double %a, ppc_fp128 %b) #0 {
+; CHECK-LABEL: foo_dl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fcpsgn 1, 2, 1
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_dl:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    xscpsgndp 1, 2, 1
+; CHECK-VSX-NEXT:    blr
 entry:
   %conv = fptrunc ppc_fp128 %b to double
   %call = tail call double @copysign(double %a, double %conv) #0
   ret double %call
 
-; CHECK-LABEL: @foo_dl
-; CHECK: fcpsgn 1, 2, 1
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_dl
-; CHECK-VSX: xscpsgndp 1, 2, 1
-; CHECK-VSX: blr
 }
 
 declare double @copysign(double, double) #0
 
 define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
+; CHECK-LABEL: foo_ll:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fcpsgn 0, 2, 1
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    li 4, 8
+; CHECK-NEXT:    fcmpu 0, 1, 0
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    iseleq 3, 4, 3
+; CHECK-NEXT:    addis 4, 2, .LCPI2_0 at toc@ha
+; CHECK-NEXT:    addi 4, 4, .LCPI2_0 at toc@l
+; CHECK-NEXT:    lfdx 2, 4, 3
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_ll:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    fmr 0, 1
+; CHECK-VSX-NEXT:    xscpsgndp 1, 2, 1
+; CHECK-VSX-NEXT:    xxlxor 2, 2, 2
+; CHECK-VSX-NEXT:    xscmpudp 0, 0, 1
+; CHECK-VSX-NEXT:    beqlr 0
+; CHECK-VSX-NEXT:  # %bb.1: # %entry
+; CHECK-VSX-NEXT:    xsnegdp 2, 2
+; CHECK-VSX-NEXT:    blr
 entry:
   %conv = fpext double %a to ppc_fp128
   %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %b) #0
   ret ppc_fp128 %call
 
-; CHECK-LABEL: @foo_ll
-; CHECK: bl copysignl
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_ll
-; CHECK-VSX: bl copysignl
-; CHECK-VSX: blr
 }
 
 define ppc_fp128 @foo_ld(double %a, double %b) #0 {
+; CHECK-LABEL: foo_ld:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fcpsgn 0, 2, 1
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    li 4, 8
+; CHECK-NEXT:    fcmpu 0, 1, 0
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    iseleq 3, 4, 3
+; CHECK-NEXT:    addis 4, 2, .LCPI3_0 at toc@ha
+; CHECK-NEXT:    addi 4, 4, .LCPI3_0 at toc@l
+; CHECK-NEXT:    lfdx 2, 4, 3
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_ld:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    fmr 0, 1
+; CHECK-VSX-NEXT:    xscpsgndp 1, 2, 1
+; CHECK-VSX-NEXT:    xxlxor 2, 2, 2
+; CHECK-VSX-NEXT:    xscmpudp 0, 0, 1
+; CHECK-VSX-NEXT:    beqlr 0
+; CHECK-VSX-NEXT:  # %bb.1: # %entry
+; CHECK-VSX-NEXT:    xsnegdp 2, 2
+; CHECK-VSX-NEXT:    blr
 entry:
   %conv = fpext double %a to ppc_fp128
   %conv1 = fpext double %b to ppc_fp128
   %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0
   ret ppc_fp128 %call
 
-; CHECK-LABEL: @foo_ld
-; CHECK: bl copysignl
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_ld
-; CHECK-VSX: bl copysignl
-; CHECK-VSX: blr
 }
 
 define ppc_fp128 @foo_lf(double %a, float %b) #0 {
+; CHECK-LABEL: foo_lf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fcpsgn 0, 2, 1
+; CHECK-NEXT:    li 3, 0
+; CHECK-NEXT:    li 4, 8
+; CHECK-NEXT:    fcmpu 0, 1, 0
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    iseleq 3, 4, 3
+; CHECK-NEXT:    addis 4, 2, .LCPI4_0 at toc@ha
+; CHECK-NEXT:    addi 4, 4, .LCPI4_0 at toc@l
+; CHECK-NEXT:    lfdx 2, 4, 3
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_lf:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    fmr 0, 1
+; CHECK-VSX-NEXT:    fcpsgn 1, 2, 1
+; CHECK-VSX-NEXT:    xxlxor 2, 2, 2
+; CHECK-VSX-NEXT:    xscmpudp 0, 0, 1
+; CHECK-VSX-NEXT:    beqlr 0
+; CHECK-VSX-NEXT:  # %bb.1: # %entry
+; CHECK-VSX-NEXT:    xsnegdp 2, 2
+; CHECK-VSX-NEXT:    blr
 entry:
   %conv = fpext double %a to ppc_fp128
   %conv1 = fpext float %b to ppc_fp128
   %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0
   ret ppc_fp128 %call
 
-; CHECK-LABEL: @foo_lf
-; CHECK: bl copysignl
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_lf
-; CHECK-VSX: bl copysignl
-; CHECK-VSX: blr
 }
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
deleted file mode 100644
index a114438a87476e..00000000000000
--- a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -verify-machineinstrs < %s -mcpu=ppc | FileCheck %s
-
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
-target triple = "powerpc-unknown-linux-gnu"
-
-define ppc_fp128 @foo(ptr nocapture %n, ppc_fp128 %d) nounwind readonly {
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %entry
-  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %x.05 = phi ppc_fp128 [ %d, %entry ], [ %conv, %for.body ]
-  %arrayidx = getelementptr inbounds ppc_fp128, ptr %n, i32 %i.06
-  %0 = load ppc_fp128, ptr %arrayidx, align 8
-  %conv = tail call ppc_fp128 @copysignl(ppc_fp128 %x.05, ppc_fp128 %d) nounwind readonly
-  %inc = add nsw i32 %i.06, 1
-  %exitcond = icmp eq i32 %inc, 2048
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.body
-  ret ppc_fp128 %conv
-}
-
-declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
-
-; CHECK: @foo
-; CHECK-NOT: mtctr
-
diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index ebec8c1c4d6543..967e6cf679d4c8 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -89,100 +89,59 @@ entry:
 }
 
 define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind  {
-; PPC64-P8-LE-LABEL: test_copysign:
-; PPC64-P8-LE:       # %bb.0: # %entry
-; PPC64-P8-LE-NEXT:    mflr 0
-; PPC64-P8-LE-NEXT:    stdu 1, -32(1)
-; PPC64-P8-LE-NEXT:    std 0, 48(1)
-; PPC64-P8-LE-NEXT:    bl copysignl
-; PPC64-P8-LE-NEXT:    nop
-; PPC64-P8-LE-NEXT:    mffprd 3, 1
-; PPC64-P8-LE-NEXT:    mffprd 4, 2
-; PPC64-P8-LE-NEXT:    addi 1, 1, 32
-; PPC64-P8-LE-NEXT:    ld 0, 16(1)
-; PPC64-P8-LE-NEXT:    mtlr 0
-; PPC64-P8-LE-NEXT:    blr
-;
-; PPC64-LE-LABEL: test_copysign:
-; PPC64-LE:       # %bb.0: # %entry
-; PPC64-LE-NEXT:    mflr 0
-; PPC64-LE-NEXT:    stdu 1, -48(1)
-; PPC64-LE-NEXT:    std 0, 64(1)
-; PPC64-LE-NEXT:    bl copysignl
-; PPC64-LE-NEXT:    nop
-; PPC64-LE-NEXT:    stfd 1, 32(1)
-; PPC64-LE-NEXT:    stfd 2, 40(1)
-; PPC64-LE-NEXT:    ld 3, 32(1)
-; PPC64-LE-NEXT:    ld 4, 40(1)
-; PPC64-LE-NEXT:    addi 1, 1, 48
-; PPC64-LE-NEXT:    ld 0, 16(1)
-; PPC64-LE-NEXT:    mtlr 0
-; PPC64-LE-NEXT:    blr
-;
-; PPC64-P8-BE-LABEL: test_copysign:
-; PPC64-P8-BE:       # %bb.0: # %entry
-; PPC64-P8-BE-NEXT:    mflr 0
-; PPC64-P8-BE-NEXT:    stdu 1, -112(1)
-; PPC64-P8-BE-NEXT:    std 0, 128(1)
-; PPC64-P8-BE-NEXT:    bl copysignl
-; PPC64-P8-BE-NEXT:    nop
-; PPC64-P8-BE-NEXT:    mffprd 3, 1
-; PPC64-P8-BE-NEXT:    mffprd 4, 2
-; PPC64-P8-BE-NEXT:    addi 1, 1, 112
-; PPC64-P8-BE-NEXT:    ld 0, 16(1)
-; PPC64-P8-BE-NEXT:    mtlr 0
-; PPC64-P8-BE-NEXT:    blr
+; PPC64-P8-LABEL: test_copysign:
+; PPC64-P8:       # %bb.0: # %entry
+; PPC64-P8-NEXT:    xscpsgndp 0, 3, 1
+; PPC64-P8-NEXT:    xscmpudp 0, 1, 0
+; PPC64-P8-NEXT:    beq 0, .LBB2_2
+; PPC64-P8-NEXT:  # %bb.1: # %entry
+; PPC64-P8-NEXT:    xsnegdp 2, 2
+; PPC64-P8-NEXT:  .LBB2_2: # %entry
+; PPC64-P8-NEXT:    mffprd 3, 0
+; PPC64-P8-NEXT:    mffprd 4, 2
+; PPC64-P8-NEXT:    blr
 ;
-; PPC64-BE-LABEL: test_copysign:
-; PPC64-BE:       # %bb.0: # %entry
-; PPC64-BE-NEXT:    mflr 0
-; PPC64-BE-NEXT:    stdu 1, -128(1)
-; PPC64-BE-NEXT:    std 0, 144(1)
-; PPC64-BE-NEXT:    bl copysignl
-; PPC64-BE-NEXT:    nop
-; PPC64-BE-NEXT:    stfd 1, 112(1)
-; PPC64-BE-NEXT:    stfd 2, 120(1)
-; PPC64-BE-NEXT:    ld 3, 112(1)
-; PPC64-BE-NEXT:    ld 4, 120(1)
-; PPC64-BE-NEXT:    addi 1, 1, 128
-; PPC64-BE-NEXT:    ld 0, 16(1)
-; PPC64-BE-NEXT:    mtlr 0
-; PPC64-BE-NEXT:    blr
+; PPC64-LABEL: test_copysign:
+; PPC64:       # %bb.0: # %entry
+; PPC64-NEXT:    xscpsgndp 0, 3, 1
+; PPC64-NEXT:    xscmpudp 0, 1, 0
+; PPC64-NEXT:    beq 0, .LBB2_2
+; PPC64-NEXT:  # %bb.1: # %entry
+; PPC64-NEXT:    xsnegdp 2, 2
+; PPC64-NEXT:  .LBB2_2: # %entry
+; PPC64-NEXT:    stfd 0, -16(1)
+; PPC64-NEXT:    stfd 2, -8(1)
+; PPC64-NEXT:    ld 3, -16(1)
+; PPC64-NEXT:    ld 4, -8(1)
+; PPC64-NEXT:    blr
 ;
 ; PPC32-LABEL: test_copysign:
 ; PPC32:       # %bb.0: # %entry
-; PPC32-NEXT:    mflr 0
-; PPC32-NEXT:    stwu 1, -80(1)
-; PPC32-NEXT:    stw 0, 84(1)
-; PPC32-NEXT:    stfd 1, 32(1)
-; PPC32-NEXT:    lwz 3, 36(1)
-; PPC32-NEXT:    stfd 2, 24(1)
-; PPC32-NEXT:    stw 3, 52(1)
-; PPC32-NEXT:    lwz 3, 32(1)
-; PPC32-NEXT:    stfd 3, 56(1)
-; PPC32-NEXT:    stw 3, 48(1)
-; PPC32-NEXT:    lwz 3, 28(1)
-; PPC32-NEXT:    lfd 4, 64(1)
-; PPC32-NEXT:    stw 3, 44(1)
-; PPC32-NEXT:    lwz 3, 24(1)
-; PPC32-NEXT:    lfd 1, 48(1)
-; PPC32-NEXT:    stw 3, 40(1)
-; PPC32-NEXT:    lwz 3, 60(1)
-; PPC32-NEXT:    lfd 2, 40(1)
-; PPC32-NEXT:    stw 3, 76(1)
-; PPC32-NEXT:    lwz 3, 56(1)
-; PPC32-NEXT:    stw 3, 72(1)
-; PPC32-NEXT:    lfd 3, 72(1)
-; PPC32-NEXT:    bl copysignl
-; PPC32-NEXT:    stfd 1, 8(1)
+; PPC32-NEXT:    stwu 1, -32(1)
+; PPC32-NEXT:    stfd 3, 8(1)
+; PPC32-NEXT:    lbz 3, 8(1)
+; PPC32-NEXT:    srwi 3, 3, 7
+; PPC32-NEXT:    andi. 3, 3, 1
+; PPC32-NEXT:    bc 12, 1, .LBB2_2
+; PPC32-NEXT:  # %bb.1: # %entry
+; PPC32-NEXT:    fabs 0, 1
+; PPC32-NEXT:    fcmpu 0, 1, 0
+; PPC32-NEXT:    bne 0, .LBB2_3
+; PPC32-NEXT:    b .LBB2_4
+; PPC32-NEXT:  .LBB2_2:
+; PPC32-NEXT:    fnabs 0, 1
+; PPC32-NEXT:    fcmpu 0, 1, 0
+; PPC32-NEXT:    beq 0, .LBB2_4
+; PPC32-NEXT:  .LBB2_3: # %entry
+; PPC32-NEXT:    fneg 2, 2
+; PPC32-NEXT:  .LBB2_4: # %entry
+; PPC32-NEXT:    stfd 0, 24(1)
 ; PPC32-NEXT:    stfd 2, 16(1)
-; PPC32-NEXT:    lwz 3, 8(1)
-; PPC32-NEXT:    lwz 4, 12(1)
+; PPC32-NEXT:    lwz 3, 24(1)
+; PPC32-NEXT:    lwz 4, 28(1)
 ; PPC32-NEXT:    lwz 5, 16(1)
 ; PPC32-NEXT:    lwz 6, 20(1)
-; PPC32-NEXT:    lwz 0, 84(1)
-; PPC32-NEXT:    addi 1, 1, 80
-; PPC32-NEXT:    mtlr 0
+; PPC32-NEXT:    addi 1, 1, 32
 ; PPC32-NEXT:    blr
 entry:
 	%0 = tail call ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 %x, ppc_fp128 %y)
@@ -236,3 +195,8 @@ entry:
 
 declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128)
 declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128, ppc_fp128)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; PPC64-BE: {{.*}}
+; PPC64-LE: {{.*}}
+; PPC64-P8-BE: {{.*}}
+; PPC64-P8-LE: {{.*}}

>From 32d117c6af67d54c613b456004933cb9551cac17 Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee at gmail.com>
Date: Tue, 29 Oct 2024 02:57:15 -0700
Subject: [PATCH 7/7] try SETUEQ

---
 .../SelectionDAG/LegalizeFloatTypes.cpp       |  2 +-
 llvm/test/CodeGen/PowerPC/copysignl.ll        | 45 ++++++++++---------
 .../PowerPC/fp128-bitcast-after-operation.ll  | 24 +++++-----
 3 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index d0c4764732a4ea..0a66f3f6628f37 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1661,7 +1661,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
   // a double-double is Hi + Lo, so if Hi flips sign, so must Lo
   Lo = DAG.getSelectCC(DL, Tmp, Hi, Lo,
                        DAG.getNode(ISD::FNEG, DL, Lo.getValueType(), Lo),
-                       ISD::SETEQ);
+                       ISD::SETUEQ);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
diff --git a/llvm/test/CodeGen/PowerPC/copysignl.ll b/llvm/test/CodeGen/PowerPC/copysignl.ll
index 2a52825158c5ee..580e88ba596e02 100644
--- a/llvm/test/CodeGen/PowerPC/copysignl.ll
+++ b/llvm/test/CodeGen/PowerPC/copysignl.ll
@@ -45,13 +45,13 @@ define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
 ; CHECK-LABEL: foo_ll:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fcpsgn 0, 2, 1
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    li 4, 8
-; CHECK-NEXT:    fcmpu 0, 1, 0
-; CHECK-NEXT:    fmr 1, 0
-; CHECK-NEXT:    iseleq 3, 4, 3
+; CHECK-NEXT:    li 3, 8
 ; CHECK-NEXT:    addis 4, 2, .LCPI2_0 at toc@ha
 ; CHECK-NEXT:    addi 4, 4, .LCPI2_0 at toc@l
+; CHECK-NEXT:    fcmpu 0, 1, 0
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    crnor 20, 2, 3
+; CHECK-NEXT:    isel 3, 0, 3, 20
 ; CHECK-NEXT:    lfdx 2, 4, 3
 ; CHECK-NEXT:    blr
 ;
@@ -60,8 +60,9 @@ define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
 ; CHECK-VSX-NEXT:    fmr 0, 1
 ; CHECK-VSX-NEXT:    xscpsgndp 1, 2, 1
 ; CHECK-VSX-NEXT:    xxlxor 2, 2, 2
-; CHECK-VSX-NEXT:    xscmpudp 0, 0, 1
-; CHECK-VSX-NEXT:    beqlr 0
+; CHECK-VSX-NEXT:    fcmpu 0, 0, 1
+; CHECK-VSX-NEXT:    cror 20, 2, 3
+; CHECK-VSX-NEXT:    bclr 12, 20, 0
 ; CHECK-VSX-NEXT:  # %bb.1: # %entry
 ; CHECK-VSX-NEXT:    xsnegdp 2, 2
 ; CHECK-VSX-NEXT:    blr
@@ -76,13 +77,13 @@ define ppc_fp128 @foo_ld(double %a, double %b) #0 {
 ; CHECK-LABEL: foo_ld:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fcpsgn 0, 2, 1
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    li 4, 8
-; CHECK-NEXT:    fcmpu 0, 1, 0
-; CHECK-NEXT:    fmr 1, 0
-; CHECK-NEXT:    iseleq 3, 4, 3
+; CHECK-NEXT:    li 3, 8
 ; CHECK-NEXT:    addis 4, 2, .LCPI3_0 at toc@ha
 ; CHECK-NEXT:    addi 4, 4, .LCPI3_0 at toc@l
+; CHECK-NEXT:    fcmpu 0, 1, 0
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    crnor 20, 2, 3
+; CHECK-NEXT:    isel 3, 0, 3, 20
 ; CHECK-NEXT:    lfdx 2, 4, 3
 ; CHECK-NEXT:    blr
 ;
@@ -91,8 +92,9 @@ define ppc_fp128 @foo_ld(double %a, double %b) #0 {
 ; CHECK-VSX-NEXT:    fmr 0, 1
 ; CHECK-VSX-NEXT:    xscpsgndp 1, 2, 1
 ; CHECK-VSX-NEXT:    xxlxor 2, 2, 2
-; CHECK-VSX-NEXT:    xscmpudp 0, 0, 1
-; CHECK-VSX-NEXT:    beqlr 0
+; CHECK-VSX-NEXT:    fcmpu 0, 0, 1
+; CHECK-VSX-NEXT:    cror 20, 2, 3
+; CHECK-VSX-NEXT:    bclr 12, 20, 0
 ; CHECK-VSX-NEXT:  # %bb.1: # %entry
 ; CHECK-VSX-NEXT:    xsnegdp 2, 2
 ; CHECK-VSX-NEXT:    blr
@@ -108,13 +110,13 @@ define ppc_fp128 @foo_lf(double %a, float %b) #0 {
 ; CHECK-LABEL: foo_lf:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fcpsgn 0, 2, 1
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    li 4, 8
-; CHECK-NEXT:    fcmpu 0, 1, 0
-; CHECK-NEXT:    fmr 1, 0
-; CHECK-NEXT:    iseleq 3, 4, 3
+; CHECK-NEXT:    li 3, 8
 ; CHECK-NEXT:    addis 4, 2, .LCPI4_0 at toc@ha
 ; CHECK-NEXT:    addi 4, 4, .LCPI4_0 at toc@l
+; CHECK-NEXT:    fcmpu 0, 1, 0
+; CHECK-NEXT:    fmr 1, 0
+; CHECK-NEXT:    crnor 20, 2, 3
+; CHECK-NEXT:    isel 3, 0, 3, 20
 ; CHECK-NEXT:    lfdx 2, 4, 3
 ; CHECK-NEXT:    blr
 ;
@@ -123,8 +125,9 @@ define ppc_fp128 @foo_lf(double %a, float %b) #0 {
 ; CHECK-VSX-NEXT:    fmr 0, 1
 ; CHECK-VSX-NEXT:    fcpsgn 1, 2, 1
 ; CHECK-VSX-NEXT:    xxlxor 2, 2, 2
-; CHECK-VSX-NEXT:    xscmpudp 0, 0, 1
-; CHECK-VSX-NEXT:    beqlr 0
+; CHECK-VSX-NEXT:    fcmpu 0, 0, 1
+; CHECK-VSX-NEXT:    cror 20, 2, 3
+; CHECK-VSX-NEXT:    bclr 12, 20, 0
 ; CHECK-VSX-NEXT:  # %bb.1: # %entry
 ; CHECK-VSX-NEXT:    xsnegdp 2, 2
 ; CHECK-VSX-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
index 967e6cf679d4c8..c51b98de5cdb04 100644
--- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
+++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -92,20 +92,22 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind  {
 ; PPC64-P8-LABEL: test_copysign:
 ; PPC64-P8:       # %bb.0: # %entry
 ; PPC64-P8-NEXT:    xscpsgndp 0, 3, 1
-; PPC64-P8-NEXT:    xscmpudp 0, 1, 0
-; PPC64-P8-NEXT:    beq 0, .LBB2_2
+; PPC64-P8-NEXT:    fcmpu 0, 1, 0
+; PPC64-P8-NEXT:    cror 20, 2, 3
+; PPC64-P8-NEXT:    bc 12, 20, .LBB2_2
 ; PPC64-P8-NEXT:  # %bb.1: # %entry
 ; PPC64-P8-NEXT:    xsnegdp 2, 2
 ; PPC64-P8-NEXT:  .LBB2_2: # %entry
-; PPC64-P8-NEXT:    mffprd 3, 0
 ; PPC64-P8-NEXT:    mffprd 4, 2
+; PPC64-P8-NEXT:    mffprd 3, 0
 ; PPC64-P8-NEXT:    blr
 ;
 ; PPC64-LABEL: test_copysign:
 ; PPC64:       # %bb.0: # %entry
 ; PPC64-NEXT:    xscpsgndp 0, 3, 1
-; PPC64-NEXT:    xscmpudp 0, 1, 0
-; PPC64-NEXT:    beq 0, .LBB2_2
+; PPC64-NEXT:    fcmpu 0, 1, 0
+; PPC64-NEXT:    cror 20, 2, 3
+; PPC64-NEXT:    bc 12, 20, .LBB2_2
 ; PPC64-NEXT:  # %bb.1: # %entry
 ; PPC64-NEXT:    xsnegdp 2, 2
 ; PPC64-NEXT:  .LBB2_2: # %entry
@@ -125,16 +127,16 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind  {
 ; PPC32-NEXT:    bc 12, 1, .LBB2_2
 ; PPC32-NEXT:  # %bb.1: # %entry
 ; PPC32-NEXT:    fabs 0, 1
-; PPC32-NEXT:    fcmpu 0, 1, 0
-; PPC32-NEXT:    bne 0, .LBB2_3
-; PPC32-NEXT:    b .LBB2_4
+; PPC32-NEXT:    b .LBB2_3
 ; PPC32-NEXT:  .LBB2_2:
 ; PPC32-NEXT:    fnabs 0, 1
-; PPC32-NEXT:    fcmpu 0, 1, 0
-; PPC32-NEXT:    beq 0, .LBB2_4
 ; PPC32-NEXT:  .LBB2_3: # %entry
+; PPC32-NEXT:    fcmpu 0, 1, 0
+; PPC32-NEXT:    cror 20, 2, 3
+; PPC32-NEXT:    bc 12, 20, .LBB2_5
+; PPC32-NEXT:  # %bb.4: # %entry
 ; PPC32-NEXT:    fneg 2, 2
-; PPC32-NEXT:  .LBB2_4: # %entry
+; PPC32-NEXT:  .LBB2_5: # %entry
 ; PPC32-NEXT:    stfd 0, 24(1)
 ; PPC32-NEXT:    stfd 2, 16(1)
 ; PPC32-NEXT:    lwz 3, 24(1)