[clang] b389354 - [Clang][PowerPC] Add max/min intrinsics to Clang and PPC backend

Tue Apr 5 19:48:16 PDT 2022

Author: Ting Wang
Date: 2022-04-05T22:43:48-04:00
New Revision: b389354b285744f700fd9372c8707fa056d7cb37

URL: https://github.com/llvm/llvm-project/commit/b389354b285744f700fd9372c8707fa056d7cb37
DIFF: https://github.com/llvm/llvm-project/commit/b389354b285744f700fd9372c8707fa056d7cb37.diff

LOG: [Clang][PowerPC] Add max/min intrinsics to Clang and PPC backend

Add support for builtin_[max|min] which has below prototype:
A builtin_max (A1, A2, A3, ...)
All arguments must have the same type; they must all be float, double, or long double.
Internally use SelectCC to get the result.

Reviewed By: qiucf

Differential Revision: https://reviews.llvm.org/D122478

Added: 
    llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-maxmin.ll

Modified: 
    clang/include/clang/Basic/BuiltinsPPC.def
    clang/lib/Basic/Targets/PPC.cpp
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Sema/SemaChecking.cpp
    clang/test/CodeGen/PowerPC/builtins-ppc.c
    clang/test/Sema/builtins-ppc.c
    llvm/include/llvm/IR/IntrinsicsPowerPC.td
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 70b0184f199f8..8a4c5b4eead27 100644

--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -152,6 +152,13 @@ BUILTIN(__builtin_ppc_mtmsr, "vUi", "")
 BUILTIN(__builtin_ppc_mtspr, "vIiULi", "")
 BUILTIN(__builtin_ppc_stfiw, "viC*d", "")
 BUILTIN(__builtin_ppc_addex, "LLiLLiLLiCIi", "")
+// select
+BUILTIN(__builtin_ppc_maxfe, "LdLdLdLd.", "t")
+BUILTIN(__builtin_ppc_maxfl, "dddd.", "t")
+BUILTIN(__builtin_ppc_maxfs, "ffff.", "t")
+BUILTIN(__builtin_ppc_minfe, "LdLdLdLd.", "t")
+BUILTIN(__builtin_ppc_minfl, "dddd.", "t")
+BUILTIN(__builtin_ppc_minfs, "ffff.", "t")
 
 BUILTIN(__builtin_ppc_get_timebase, "ULLi", "n")
 

diff  --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 1eb0317af60b6..bafcc23b38334 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -247,6 +247,12 @@ static void defineXLCompatMacros(MacroBuilder &Builder) {
   Builder.defineMacro("__test_data_class", "__builtin_ppc_test_data_class");
   Builder.defineMacro("__swdiv", "__builtin_ppc_swdiv");
   Builder.defineMacro("__swdivs", "__builtin_ppc_swdivs");
+  Builder.defineMacro("__builtin_maxfe", "__builtin_ppc_maxfe");
+  Builder.defineMacro("__builtin_maxfl", "__builtin_ppc_maxfl");
+  Builder.defineMacro("__builtin_maxfs", "__builtin_ppc_maxfs");
+  Builder.defineMacro("__builtin_minfe", "__builtin_ppc_minfe");
+  Builder.defineMacro("__builtin_minfl", "__builtin_ppc_minfl");
+  Builder.defineMacro("__builtin_minfs", "__builtin_ppc_minfs");
 }
 
 /// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 8ca4b2d0bf15d..661c0a105f427 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -16302,6 +16302,18 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(CGM.getIntrinsic(IntrinsicID), Ops,
                               "test_data_class");
   }
+  case PPC::BI__builtin_ppc_maxfe:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe), Ops);
+  case PPC::BI__builtin_ppc_maxfl:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl), Ops);
+  case PPC::BI__builtin_ppc_maxfs:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs), Ops);
+  case PPC::BI__builtin_ppc_minfe:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe), Ops);
+  case PPC::BI__builtin_ppc_minfl:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl), Ops);
+  case PPC::BI__builtin_ppc_minfs:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs), Ops);
   case PPC::BI__builtin_ppc_swdiv:
   case PPC::BI__builtin_ppc_swdivs:
     return Builder.CreateFDiv(Ops[0], Ops[1], "swdiv");

diff  --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 7e73988c33b74..9331d169f800f 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3904,6 +3904,33 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                             diag::err_ppc_builtin_requires_vsx) ||
            SemaBuiltinConstantArgRange(TheCall, 1, 0, 127);
   }
+  case PPC::BI__builtin_ppc_maxfe:
+  case PPC::BI__builtin_ppc_minfe:
+  case PPC::BI__builtin_ppc_maxfl:
+  case PPC::BI__builtin_ppc_minfl:
+  case PPC::BI__builtin_ppc_maxfs:
+  case PPC::BI__builtin_ppc_minfs: {
+    if (Context.getTargetInfo().getTriple().isOSAIX() &&
+        (BuiltinID == PPC::BI__builtin_ppc_maxfe ||
+         BuiltinID == PPC::BI__builtin_ppc_minfe))
+      return Diag(TheCall->getBeginLoc(), diag::err_target_unsupported_type)
+             << "builtin" << true << 128 << QualType(Context.LongDoubleTy)
+             << false << Context.getTargetInfo().getTriple().str();
+    // Argument type should be exact.
+    QualType ArgType = QualType(Context.LongDoubleTy);
+    if (BuiltinID == PPC::BI__builtin_ppc_maxfl ||
+        BuiltinID == PPC::BI__builtin_ppc_minfl)
+      ArgType = QualType(Context.DoubleTy);
+    else if (BuiltinID == PPC::BI__builtin_ppc_maxfs ||
+             BuiltinID == PPC::BI__builtin_ppc_minfs)
+      ArgType = QualType(Context.FloatTy);
+    for (unsigned I = 0, E = TheCall->getNumArgs(); I < E; ++I)
+      if (TheCall->getArg(I)->getType() != ArgType)
+        return Diag(TheCall->getBeginLoc(),
+                    diag::err_typecheck_convert_incompatible)
+               << TheCall->getArg(I)->getType() << ArgType << 1 << 0 << 0;
+    return false;
+  }
   case PPC::BI__builtin_ppc_load8r:
   case PPC::BI__builtin_ppc_store8r:
     return SemaFeatureCheck(*this, TheCall, "isa-v206-instructions",

diff  --git a/clang/test/CodeGen/PowerPC/builtins-ppc.c b/clang/test/CodeGen/PowerPC/builtins-ppc.c
index cbd53346d4b0f..adfbf27b4c8d4 100644
--- a/clang/test/CodeGen/PowerPC/builtins-ppc.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc.c
@@ -46,3 +46,31 @@ long double test_builtin_pack_ldbl(double x, double y) {
   // CHECK: call ppc_fp128 @llvm.ppc.pack.longdouble(double %0, double %1)
   return __builtin_pack_longdouble(x, y);
 }
+
+void test_builtin_ppc_maxminfe(long double a, long double b, long double c,
+                               long double d) {
+  volatile long double res;
+  // CHECK: call ppc_fp128 (ppc_fp128, ppc_fp128, ppc_fp128, ...) @llvm.ppc.maxfe(ppc_fp128 %0, ppc_fp128 %1, ppc_fp128 %2, ppc_fp128 %3)
+  res = __builtin_ppc_maxfe(a, b, c, d);
+
+  // CHECK: call ppc_fp128 (ppc_fp128, ppc_fp128, ppc_fp128, ...) @llvm.ppc.minfe(ppc_fp128 %5, ppc_fp128 %6, ppc_fp128 %7, ppc_fp128 %8)
+  res = __builtin_ppc_minfe(a, b, c, d);
+}
+
+void test_builtin_ppc_maxminfl(double a, double b, double c, double d) {
+  volatile double res;
+  // CHECK: call double (double, double, double, ...) @llvm.ppc.maxfl(double %0, double %1, double %2, double %3)
+  res = __builtin_ppc_maxfl(a, b, c, d);
+
+  // CHECK: call double (double, double, double, ...) @llvm.ppc.minfl(double %5, double %6, double %7, double %8)
+  res = __builtin_ppc_minfl(a, b, c, d);
+}
+
+void test_builtin_ppc_maxminfs(float a, float b, float c, float d) {
+  volatile float res;
+  // CHECK: call float (float, float, float, ...) @llvm.ppc.maxfs(float %0, float %1, float %2, float %3)
+  res = __builtin_ppc_maxfs(a, b, c, d);
+
+  // CHECK: call float (float, float, float, ...) @llvm.ppc.minfs(float %5, float %6, float %7, float %8)
+  res = __builtin_ppc_minfs(a, b, c, d);
+}

diff  --git a/clang/test/Sema/builtins-ppc.c b/clang/test/Sema/builtins-ppc.c
index a0550c72a7785..0f59b990331cb 100644
--- a/clang/test/Sema/builtins-ppc.c
+++ b/clang/test/Sema/builtins-ppc.c
@@ -10,6 +10,12 @@
 // RUN: -triple powerpc64le-unknown-unknown -DTEST_CRYPTO -fsyntax-only \
 // RUN: -target-feature +vsx -verify %s
 
+// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -DTEST_MAXMIN -fsyntax-only \
+// RUN: -verify %s
+
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -DTEST_MAXMIN -fsyntax-only \
+// RUN: -verify %s
+
 #ifdef TEST_HTM
 void test_htm() {
   __builtin_tbegin(4); // expected-error-re {{argument value {{.*}} is outside the valid range}}
@@ -54,3 +60,25 @@ vector unsigned long long test_vshasigmad_or(void)
 
 #endif
 
+#ifdef TEST_MAXMIN
+void test_maxmin() {
+  long double fe;
+  double fl;
+  float fs;
+#ifdef _AIX
+  __builtin_ppc_maxfe(fe, fe, fe, fe); // expected-error-re {{builtin requires 128 bit size 'long double' type support, but target {{.*}} does not support it}}
+  __builtin_ppc_minfe(fe, fe, fe, fe); // expected-error-re {{builtin requires 128 bit size 'long double' type support, but target {{.*}} does not support it}}
+  __builtin_ppc_maxfl(fs, fs, fs, fs); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+  __builtin_ppc_minfl(fs, fs, fs, fs); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+  __builtin_ppc_maxfs(fe, fe, fe, fe); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+  __builtin_ppc_minfs(fe, fe, fe, fe); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+#else
+  __builtin_ppc_maxfe(fl, fl, fl, fl); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+  __builtin_ppc_minfe(fl, fl, fl, fl); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+  __builtin_ppc_maxfl(fs, fs, fs, fs); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+  __builtin_ppc_minfl(fs, fs, fs, fs); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+  __builtin_ppc_maxfs(fe, fe, fe, fe); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+  __builtin_ppc_minfs(fe, fe, fe, fe); // expected-error-re {{passing {{.*}} to parameter of incompatible type {{.*}}}}
+#endif
+}
+#endif

diff  --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 44d2d52705d02..aa050ec6d5883 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -160,6 +160,37 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_fctuwz
       : GCCBuiltin<"__builtin_ppc_fctuwz">,
         Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+
+  // XL compatible select functions
+  // TODO: Add llvm_f128_ty support.
+  def int_ppc_maxfe
+      : Intrinsic<
+            [llvm_ppcf128_ty],
+            [llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_vararg_ty],
+            [IntrNoMem]>;
+  def int_ppc_maxfl
+      : Intrinsic<
+            [llvm_double_ty],
+            [llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_vararg_ty],
+            [IntrNoMem]>;
+  def int_ppc_maxfs
+      : Intrinsic<[llvm_float_ty],
+                  [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_vararg_ty],
+                  [IntrNoMem]>;
+  def int_ppc_minfe
+      : Intrinsic<
+            [llvm_ppcf128_ty],
+            [llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_ppcf128_ty, llvm_vararg_ty],
+            [IntrNoMem]>;
+  def int_ppc_minfl
+      : Intrinsic<
+            [llvm_double_ty],
+            [llvm_double_ty, llvm_double_ty, llvm_double_ty, llvm_vararg_ty],
+            [IntrNoMem]>;
+  def int_ppc_minfs
+      : Intrinsic<[llvm_float_ty],
+                  [llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_vararg_ty],
+                  [IntrNoMem]>;
 }
 
 let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c99c68d7b018a..b126ed486b0d6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10563,6 +10563,30 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                     dl, SDValue());
     return Result.first;
   }
+  case Intrinsic::ppc_maxfe:
+  case Intrinsic::ppc_maxfl:
+  case Intrinsic::ppc_maxfs:
+  case Intrinsic::ppc_minfe:
+  case Intrinsic::ppc_minfl:
+  case Intrinsic::ppc_minfs: {
+    EVT VT = Op.getValueType();
+    assert(
+        all_of(Op->ops().drop_front(4),
+               [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
+        "ppc_[max|min]f[e|l|s] must have uniform type arguments");
+    ISD::CondCode CC = ISD::SETGT;
+    if (IntrinsicID == Intrinsic::ppc_minfe ||
+        IntrinsicID == Intrinsic::ppc_minfl ||
+        IntrinsicID == Intrinsic::ppc_minfs)
+      CC = ISD::SETLT;
+    unsigned I = Op.getNumOperands() - 2, Cnt = I;
+    SDValue Res = Op.getOperand(I);
+    for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
+      Res =
+          DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
+    }
+    return Res;
+  }
   }
 
   // If this is a lowered altivec predicate compare, CompareOpc is set to the
@@ -11223,6 +11247,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
       Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
                                     N->getOperand(2), N->getOperand(1)));
       break;
+    case Intrinsic::ppc_maxfe:
+    case Intrinsic::ppc_minfe:
     case Intrinsic::ppc_fnmsub:
     case Intrinsic::ppc_convert_f128_to_ppcf128:
       Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));

diff  --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-maxmin.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-maxmin.ll
new file mode 100644
index 0000000000000..fc4e1729036b8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-maxmin.ll
@@ -0,0 +1,257 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \
+; RUN:    < %s | FileCheck --check-prefixes=CHECK,CHECK-P9 %s
+; RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \
+; RUN:    < %s | FileCheck --check-prefixes=CHECK,CHECK-P8 %s
+
+declare ppc_fp128 @llvm.ppc.maxfe(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ...)
+define ppc_fp128 @test_maxfe(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) {
+; CHECK-LABEL: test_maxfe:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fcmpu 0, 6, 4
+; CHECK-NEXT:    fcmpu 1, 5, 3
+; CHECK-NEXT:    crand 20, 6, 1
+; CHECK-NEXT:    cror 20, 5, 20
+; CHECK-NEXT:    bc 12, 20, .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    fmr 6, 4
+; CHECK-NEXT:  .LBB0_2: # %entry
+; CHECK-NEXT:    fcmpu 0, 6, 2
+; CHECK-NEXT:    bc 12, 20, .LBB0_4
+; CHECK-NEXT:  # %bb.3: # %entry
+; CHECK-NEXT:    fmr 5, 3
+; CHECK-NEXT:  .LBB0_4: # %entry
+; CHECK-NEXT:    fcmpu 1, 5, 1
+; CHECK-NEXT:    crand 20, 6, 1
+; CHECK-NEXT:    cror 20, 5, 20
+; CHECK-NEXT:    bc 12, 20, .LBB0_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    fmr 6, 2
+; CHECK-NEXT:  .LBB0_6: # %entry
+; CHECK-NEXT:    fcmpu 0, 6, 8
+; CHECK-NEXT:    bc 12, 20, .LBB0_8
+; CHECK-NEXT:  # %bb.7: # %entry
+; CHECK-NEXT:    fmr 5, 1
+; CHECK-NEXT:  .LBB0_8: # %entry
+; CHECK-NEXT:    fcmpu 1, 5, 7
+; CHECK-NEXT:    crand 20, 6, 1
+; CHECK-NEXT:    cror 20, 5, 20
+; CHECK-NEXT:    bc 12, 20, .LBB0_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    fmr 5, 7
+; CHECK-NEXT:  .LBB0_10: # %entry
+; CHECK-NEXT:    bc 12, 20, .LBB0_12
+; CHECK-NEXT:  # %bb.11: # %entry
+; CHECK-NEXT:    fmr 6, 8
+; CHECK-NEXT:  .LBB0_12: # %entry
+; CHECK-NEXT:    fmr 1, 5
+; CHECK-NEXT:    fmr 2, 6
+; CHECK-NEXT:    blr
+entry:
+  %0 = call ppc_fp128 (ppc_fp128, ppc_fp128, ppc_fp128, ...) @llvm.ppc.maxfe(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d)
+  ret ppc_fp128 %0
+}
+
+declare double @llvm.ppc.maxfl(double %a, double %b, double %c, ...)
+define double @test_maxfl(double %a, double %b, double %c, double %d) {
+; CHECK-P9-LABEL: test_maxfl:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xsmaxcdp 0, 3, 2
+; CHECK-P9-NEXT:    xsmaxcdp 0, 0, 1
+; CHECK-P9-NEXT:    xsmaxcdp 1, 0, 4
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: test_maxfl:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 2
+; CHECK-P8-NEXT:    ble 0, .LBB1_4
+; CHECK-P8-NEXT:  # %bb.1: # %entry
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 1
+; CHECK-P8-NEXT:    ble 0, .LBB1_5
+; CHECK-P8-NEXT:  .LBB1_2: # %entry
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 4
+; CHECK-P8-NEXT:    ble 0, .LBB1_6
+; CHECK-P8-NEXT:  .LBB1_3: # %entry
+; CHECK-P8-NEXT:    fmr 1, 3
+; CHECK-P8-NEXT:    blr
+; CHECK-P8-NEXT:  .LBB1_4: # %entry
+; CHECK-P8-NEXT:    fmr 3, 2
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 1
+; CHECK-P8-NEXT:    bgt 0, .LBB1_2
+; CHECK-P8-NEXT:  .LBB1_5: # %entry
+; CHECK-P8-NEXT:    fmr 3, 1
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 4
+; CHECK-P8-NEXT:    bgt 0, .LBB1_3
+; CHECK-P8-NEXT:  .LBB1_6: # %entry
+; CHECK-P8-NEXT:    fmr 3, 4
+; CHECK-P8-NEXT:    fmr 1, 3
+; CHECK-P8-NEXT:    blr
+entry:
+  %0 = call double (double, double, double, ...) @llvm.ppc.maxfl(double %a, double %b, double %c, double %d)
+  ret double %0
+}
+
+declare float @llvm.ppc.maxfs(float %a, float %b, float %c, ...)
+define float @test_maxfs(float %a, float %b, float %c, float %d) {
+; CHECK-P9-LABEL: test_maxfs:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xsmaxcdp 0, 3, 2
+; CHECK-P9-NEXT:    xsmaxcdp 0, 0, 1
+; CHECK-P9-NEXT:    xsmaxcdp 1, 0, 4
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: test_maxfs:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    fcmpu 0, 3, 2
+; CHECK-P8-NEXT:    ble 0, .LBB2_4
+; CHECK-P8-NEXT:  # %bb.1: # %entry
+; CHECK-P8-NEXT:    fcmpu 0, 3, 1
+; CHECK-P8-NEXT:    ble 0, .LBB2_5
+; CHECK-P8-NEXT:  .LBB2_2: # %entry
+; CHECK-P8-NEXT:    fcmpu 0, 3, 4
+; CHECK-P8-NEXT:    ble 0, .LBB2_6
+; CHECK-P8-NEXT:  .LBB2_3: # %entry
+; CHECK-P8-NEXT:    fmr 1, 3
+; CHECK-P8-NEXT:    blr
+; CHECK-P8-NEXT:  .LBB2_4: # %entry
+; CHECK-P8-NEXT:    fmr 3, 2
+; CHECK-P8-NEXT:    fcmpu 0, 3, 1
+; CHECK-P8-NEXT:    bgt 0, .LBB2_2
+; CHECK-P8-NEXT:  .LBB2_5: # %entry
+; CHECK-P8-NEXT:    fmr 3, 1
+; CHECK-P8-NEXT:    fcmpu 0, 3, 4
+; CHECK-P8-NEXT:    bgt 0, .LBB2_3
+; CHECK-P8-NEXT:  .LBB2_6: # %entry
+; CHECK-P8-NEXT:    fmr 3, 4
+; CHECK-P8-NEXT:    fmr 1, 3
+; CHECK-P8-NEXT:    blr
+entry:
+  %0 = call float (float, float, float, ...) @llvm.ppc.maxfs(float %a, float %b, float %c, float %d)
+  ret float %0
+}
+
+declare ppc_fp128 @llvm.ppc.minfe(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ...)
+define ppc_fp128 @test_minfe(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) {
+; CHECK-LABEL: test_minfe:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fcmpu 0, 6, 4
+; CHECK-NEXT:    fcmpu 1, 5, 3
+; CHECK-NEXT:    crand 20, 6, 0
+; CHECK-NEXT:    cror 20, 4, 20
+; CHECK-NEXT:    bc 12, 20, .LBB3_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    fmr 6, 4
+; CHECK-NEXT:  .LBB3_2: # %entry
+; CHECK-NEXT:    fcmpu 0, 6, 2
+; CHECK-NEXT:    bc 12, 20, .LBB3_4
+; CHECK-NEXT:  # %bb.3: # %entry
+; CHECK-NEXT:    fmr 5, 3
+; CHECK-NEXT:  .LBB3_4: # %entry
+; CHECK-NEXT:    fcmpu 1, 5, 1
+; CHECK-NEXT:    crand 20, 6, 0
+; CHECK-NEXT:    cror 20, 4, 20
+; CHECK-NEXT:    bc 12, 20, .LBB3_6
+; CHECK-NEXT:  # %bb.5: # %entry
+; CHECK-NEXT:    fmr 6, 2
+; CHECK-NEXT:  .LBB3_6: # %entry
+; CHECK-NEXT:    fcmpu 0, 6, 8
+; CHECK-NEXT:    bc 12, 20, .LBB3_8
+; CHECK-NEXT:  # %bb.7: # %entry
+; CHECK-NEXT:    fmr 5, 1
+; CHECK-NEXT:  .LBB3_8: # %entry
+; CHECK-NEXT:    fcmpu 1, 5, 7
+; CHECK-NEXT:    crand 20, 6, 0
+; CHECK-NEXT:    cror 20, 4, 20
+; CHECK-NEXT:    bc 12, 20, .LBB3_10
+; CHECK-NEXT:  # %bb.9: # %entry
+; CHECK-NEXT:    fmr 5, 7
+; CHECK-NEXT:  .LBB3_10: # %entry
+; CHECK-NEXT:    bc 12, 20, .LBB3_12
+; CHECK-NEXT:  # %bb.11: # %entry
+; CHECK-NEXT:    fmr 6, 8
+; CHECK-NEXT:  .LBB3_12: # %entry
+; CHECK-NEXT:    fmr 1, 5
+; CHECK-NEXT:    fmr 2, 6
+; CHECK-NEXT:    blr
+entry:
+  %0 = call ppc_fp128 (ppc_fp128, ppc_fp128, ppc_fp128, ...) @llvm.ppc.minfe(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d)
+  ret ppc_fp128 %0
+}
+
+declare double @llvm.ppc.minfl(double %a, double %b, double %c, ...)
+define double @test_minfl(double %a, double %b, double %c, double %d) {
+; CHECK-P9-LABEL: test_minfl:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xsmincdp 0, 3, 2
+; CHECK-P9-NEXT:    xsmincdp 0, 0, 1
+; CHECK-P9-NEXT:    xsmincdp 1, 0, 4
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: test_minfl:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 2
+; CHECK-P8-NEXT:    bge 0, .LBB4_4
+; CHECK-P8-NEXT:  # %bb.1: # %entry
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 1
+; CHECK-P8-NEXT:    bge 0, .LBB4_5
+; CHECK-P8-NEXT:  .LBB4_2: # %entry
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 4
+; CHECK-P8-NEXT:    bge 0, .LBB4_6
+; CHECK-P8-NEXT:  .LBB4_3: # %entry
+; CHECK-P8-NEXT:    fmr 1, 3
+; CHECK-P8-NEXT:    blr
+; CHECK-P8-NEXT:  .LBB4_4: # %entry
+; CHECK-P8-NEXT:    fmr 3, 2
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 1
+; CHECK-P8-NEXT:    blt 0, .LBB4_2
+; CHECK-P8-NEXT:  .LBB4_5: # %entry
+; CHECK-P8-NEXT:    fmr 3, 1
+; CHECK-P8-NEXT:    xscmpudp 0, 3, 4
+; CHECK-P8-NEXT:    blt 0, .LBB4_3
+; CHECK-P8-NEXT:  .LBB4_6: # %entry
+; CHECK-P8-NEXT:    fmr 3, 4
+; CHECK-P8-NEXT:    fmr 1, 3
+; CHECK-P8-NEXT:    blr
+entry:
+  %0 = call double (double, double, double, ...) @llvm.ppc.minfl(double %a, double %b, double %c, double %d)
+  ret double %0
+}
+
+declare float @llvm.ppc.minfs(float %a, float %b, float %c, ...)
+define float @test_minfs(float %a, float %b, float %c, float %d) {
+; CHECK-P9-LABEL: test_minfs:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xsmincdp 0, 3, 2
+; CHECK-P9-NEXT:    xsmincdp 0, 0, 1
+; CHECK-P9-NEXT:    xsmincdp 1, 0, 4
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: test_minfs:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    fcmpu 0, 3, 2
+; CHECK-P8-NEXT:    bge 0, .LBB5_4
+; CHECK-P8-NEXT:  # %bb.1: # %entry
+; CHECK-P8-NEXT:    fcmpu 0, 3, 1
+; CHECK-P8-NEXT:    bge 0, .LBB5_5
+; CHECK-P8-NEXT:  .LBB5_2: # %entry
+; CHECK-P8-NEXT:    fcmpu 0, 3, 4
+; CHECK-P8-NEXT:    bge 0, .LBB5_6
+; CHECK-P8-NEXT:  .LBB5_3: # %entry
+; CHECK-P8-NEXT:    fmr 1, 3
+; CHECK-P8-NEXT:    blr
+; CHECK-P8-NEXT:  .LBB5_4: # %entry
+; CHECK-P8-NEXT:    fmr 3, 2
+; CHECK-P8-NEXT:    fcmpu 0, 3, 1
+; CHECK-P8-NEXT:    blt 0, .LBB5_2
+; CHECK-P8-NEXT:  .LBB5_5: # %entry
+; CHECK-P8-NEXT:    fmr 3, 1
+; CHECK-P8-NEXT:    fcmpu 0, 3, 4
+; CHECK-P8-NEXT:    blt 0, .LBB5_3
+; CHECK-P8-NEXT:  .LBB5_6: # %entry
+; CHECK-P8-NEXT:    fmr 3, 4
+; CHECK-P8-NEXT:    fmr 1, 3
+; CHECK-P8-NEXT:    blr
+entry:
+  %0 = call float (float, float, float, ...) @llvm.ppc.minfs(float %a, float %b, float %c, float %d)
+  ret float %0
+}