[llvm] [NVPTX] Add support for integer min/max ReLU idiom (PR #151727)

Alex MacLean via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 1 09:41:04 PDT 2025


https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/151727

None

>From 0995591031d3f6ecd1c4bc13ebb60c05a9893958 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Fri, 1 Aug 2025 02:10:16 +0000
Subject: [PATCH] [NVPTX] Add support for integer min/max ReLU idiom

---
 llvm/lib/Target/NVPTX/NVPTXInstrInfo.td    |  26 +-
 llvm/test/CodeGen/NVPTX/combine-min-max.ll | 733 ++++++++++++++++++---
 2 files changed, 650 insertions(+), 109 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index d8047d31ff6f0..e42fca6b3b2a1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -151,6 +151,8 @@ class OneUse2<SDPatternOperator operator>
 class fpimm_pos_inf<ValueType vt>
     : FPImmLeaf<vt, [{ return Imm.isPosInfinity(); }]>;
 
+class zeroinitializer<ValueType vt> : 
+  PatLeaf<(vt (bitconvert (!cast<ValueType>("i" # vt.Size) 0)))>;
 
 
 // Operands which can hold a Register or an Immediate.
@@ -789,6 +791,23 @@ def UMAX16x2 : I16x2<"max.u", umax>;
 def SMIN16x2 : I16x2<"min.s", smin>;
 def UMIN16x2 : I16x2<"min.u", umin>;
 
+let Predicates = [hasPTX<80>, hasSM<90>, hasOptEnabled] in {
+
+  def MIN_RELU_S32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
+                     "min.relu.s32",
+                     [(set i32:$dst, (smax (smin i32:$a, i32:$b), 0))]>;
+  def MAX_RELU_S32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
+                     "max.relu.s32",
+                     [(set i32:$dst, (smax (smax i32:$a, i32:$b), 0))]>;
+  def MIN_RELU_S16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
+                     "min.relu.s16x2",
+                     [(set v2i16:$dst, (smax (smin v2i16:$a, v2i16:$b),
+                                             zeroinitializer<v2i16>))]>;
+  def MAX_RELU_S16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a, B32:$b),
+                     "max.relu.s16x2",
+                     [(set v2i16:$dst, (smax (smax v2i16:$a, v2i16:$b),
+                                             zeroinitializer<v2i16>))]>;
+}
 
 //
 // Wide multiplication
@@ -2385,9 +2404,6 @@ def fpimm_any_zero : FPImmLeaf<fAny, [{
   return Imm.isZero();
 }]>;
 
-def fpimm_positive_zero_v2f16 : PatFrag<(ops), (v2f16 (bitconvert (i32 0)))>;
-def fpimm_positive_zero_v2bf16 : PatFrag<(ops), (v2bf16 (bitconvert (i32 0)))>;
-
 // Perform substitution if fma only has one use, and also if instruction has
 // nnan instruction flag or if the TM has NoNaNsFPMath
 def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
@@ -2410,10 +2426,10 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
 
 let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
   def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_any_zero>;
-  def FMARELU_F16X2 : FMARELUInst<F16X2RT, true, fpimm_positive_zero_v2f16>;
+  def FMARELU_F16X2 : FMARELUInst<F16X2RT, true, zeroinitializer<v2f16>>;
 }
 
 let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in {
   def FMARELU_BF16 : FMARELUInst<BF16RT, false, fpimm_any_zero>;
-  def FMARELU_BF16X2 : FMARELUInst<BF16X2RT, false, fpimm_positive_zero_v2bf16>;
+  def FMARELU_BF16X2 : FMARELUInst<BF16X2RT, false, zeroinitializer<v2bf16>>;
 }
diff --git a/llvm/test/CodeGen/NVPTX/combine-min-max.ll b/llvm/test/CodeGen/NVPTX/combine-min-max.ll
index 157c3cc6bd2e4..a18aa77a705b9 100644
--- a/llvm/test/CodeGen/NVPTX/combine-min-max.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-min-max.ll
@@ -1,22 +1,35 @@
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O2 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O2 | %ptxas-verify %}
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -O3 | FileCheck %s
+; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_90 -mattr=+ptx80 -O3 | %ptxas-verify %}
+
+target triple = "nvptx64-nvidia-cuda"
 
 ; *************************************
 ; * Cases with no min/max
 
 define i32 @ab_eq_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ab_eq_i32
-; CHECK-NOT: min
-; CHECK-NOT: max
+; CHECK-LABEL: ab_eq_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ab_eq_i32_param_1];
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp eq i32 %a, %b
   %sel = select i1 %cmp, i32 %a, i32 %b
   ret i32 %sel
 }
 
 define i64 @ab_ne_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ab_ne_i64
-; CHECK-NOT: min
-; CHECK-NOT: max
+; CHECK-LABEL: ab_ne_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ab_ne_i64_param_1];
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd1;
+; CHECK-NEXT:    ret;
   %cmp = icmp ne i64 %a, %b
   %sel = select i1 %cmp, i64 %b, i64 %a
   ret i64 %sel
@@ -27,32 +40,72 @@ define i64 @ab_ne_i64(i64 %a, i64 %b) {
 
 ; *** ab, unsigned, i16
 define i16 @ab_ugt_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ab_ugt_i16
-; CHECK: max.u16
+; CHECK-LABEL: ab_ugt_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ab_ugt_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ab_ugt_i16_param_1];
+; CHECK-NEXT:    max.u16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp ugt i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
 define i16 @ab_uge_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ab_uge_i16
-; CHECK: max.u16
+; CHECK-LABEL: ab_uge_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ab_uge_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ab_uge_i16_param_1];
+; CHECK-NEXT:    max.u16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp uge i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
 define i16 @ab_ult_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ab_ult_i16
-; CHECK: min.u16
+; CHECK-LABEL: ab_ult_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ab_ult_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ab_ult_i16_param_1];
+; CHECK-NEXT:    min.u16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp ult i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
 define i16 @ab_ule_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ab_ule_i16
-; CHECK: min.u16
+; CHECK-LABEL: ab_ule_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ab_ule_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ab_ule_i16_param_1];
+; CHECK-NEXT:    min.u16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp ule i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
@@ -60,32 +113,72 @@ define i16 @ab_ule_i16(i16 %a, i16 %b) {
 
 ; *** ab, signed, i16
 define i16 @ab_sgt_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ab_sgt_i16
-; CHECK: max.s16
+; CHECK-LABEL: ab_sgt_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ab_sgt_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ab_sgt_i16_param_1];
+; CHECK-NEXT:    max.s16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp sgt i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
 define i16 @ab_sge_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ab_sge_i16
-; CHECK: max.s16
+; CHECK-LABEL: ab_sge_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ab_sge_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ab_sge_i16_param_1];
+; CHECK-NEXT:    max.s16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp sge i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
 define i16 @ab_slt_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ab_slt_i16
-; CHECK: min.s16
+; CHECK-LABEL: ab_slt_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ab_slt_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ab_slt_i16_param_1];
+; CHECK-NEXT:    min.s16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp slt i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
 }
 
 define i16 @ab_sle_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ab_sle_i16
-; CHECK: min.s16
+; CHECK-LABEL: ab_sle_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ab_sle_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ab_sle_i16_param_1];
+; CHECK-NEXT:    min.s16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp sle i16 %a, %b
   %sel = select i1 %cmp, i16 %a, i16 %b
   ret i16 %sel
@@ -93,32 +186,72 @@ define i16 @ab_sle_i16(i16 %a, i16 %b) {
 
 ; *** ba, unsigned, i16
 define i16 @ba_ugt_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ba_ugt_i16
-; CHECK: min.u16
+; CHECK-LABEL: ba_ugt_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ba_ugt_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ba_ugt_i16_param_1];
+; CHECK-NEXT:    min.u16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp ugt i16 %a, %b
   %sel = select i1 %cmp, i16 %b, i16 %a
   ret i16 %sel
 }
 
 define i16 @ba_uge_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ba_uge_i16
-; CHECK: min.u16
+; CHECK-LABEL: ba_uge_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ba_uge_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ba_uge_i16_param_1];
+; CHECK-NEXT:    min.u16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp uge i16 %a, %b
   %sel = select i1 %cmp, i16 %b, i16 %a
   ret i16 %sel
 }
 
 define i16 @ba_ult_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ba_ult_i16
-; CHECK: max.u16
+; CHECK-LABEL: ba_ult_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ba_ult_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ba_ult_i16_param_1];
+; CHECK-NEXT:    max.u16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp ult i16 %a, %b
   %sel = select i1 %cmp, i16 %b, i16 %a
   ret i16 %sel
 }
 
 define i16 @ba_ule_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ba_ule_i16
-; CHECK: max.u16
+; CHECK-LABEL: ba_ule_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ba_ule_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ba_ule_i16_param_1];
+; CHECK-NEXT:    max.u16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp ule i16 %a, %b
   %sel = select i1 %cmp, i16 %b, i16 %a
   ret i16 %sel
@@ -126,32 +259,72 @@ define i16 @ba_ule_i16(i16 %a, i16 %b) {
 
 ; *** ba, signed, i16
 define i16 @ba_sgt_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ba_sgt_i16
-; CHECK: min.s16
+; CHECK-LABEL: ba_sgt_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ba_sgt_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ba_sgt_i16_param_1];
+; CHECK-NEXT:    min.s16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp sgt i16 %a, %b
   %sel = select i1 %cmp, i16 %b, i16 %a
   ret i16 %sel
 }
 
 define i16 @ba_sge_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ba_sge_i16
-; CHECK: min.s16
+; CHECK-LABEL: ba_sge_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ba_sge_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ba_sge_i16_param_1];
+; CHECK-NEXT:    min.s16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp sge i16 %a, %b
   %sel = select i1 %cmp, i16 %b, i16 %a
   ret i16 %sel
 }
 
 define i16 @ba_slt_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ba_slt_i16
-; CHECK: max.s16
+; CHECK-LABEL: ba_slt_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ba_slt_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ba_slt_i16_param_1];
+; CHECK-NEXT:    max.s16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp slt i16 %a, %b
   %sel = select i1 %cmp, i16 %b, i16 %a
   ret i16 %sel
 }
 
 define i16 @ba_sle_i16(i16 %a, i16 %b) {
-; CHECK-LABEL: @ba_sle_i16
-; CHECK: max.s16
+; CHECK-LABEL: ba_sle_i16(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b16 %rs<4>;
+; CHECK-NEXT:    .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b16 %rs1, [ba_sle_i16_param_0];
+; CHECK-NEXT:    ld.param.b16 %rs2, [ba_sle_i16_param_1];
+; CHECK-NEXT:    max.s16 %rs3, %rs1, %rs2;
+; CHECK-NEXT:    cvt.u32.u16 %r1, %rs3;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT:    ret;
   %cmp = icmp sle i16 %a, %b
   %sel = select i1 %cmp, i16 %b, i16 %a
   ret i16 %sel
@@ -162,32 +335,64 @@ define i16 @ba_sle_i16(i16 %a, i16 %b) {
 
 ; *** ab, unsigned, i32
 define i32 @ab_ugt_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ab_ugt_i32
-; CHECK: max.u32
+; CHECK-LABEL: ab_ugt_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ab_ugt_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ab_ugt_i32_param_1];
+; CHECK-NEXT:    max.u32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ugt i32 %a, %b
   %sel = select i1 %cmp, i32 %a, i32 %b
   ret i32 %sel
 }
 
 define i32 @ab_uge_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ab_uge_i32
-; CHECK: max.u32
+; CHECK-LABEL: ab_uge_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ab_uge_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ab_uge_i32_param_1];
+; CHECK-NEXT:    max.u32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp uge i32 %a, %b
   %sel = select i1 %cmp, i32 %a, i32 %b
   ret i32 %sel
 }
 
 define i32 @ab_ult_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ab_ult_i32
-; CHECK: min.u32
+; CHECK-LABEL: ab_ult_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ab_ult_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ab_ult_i32_param_1];
+; CHECK-NEXT:    min.u32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ult i32 %a, %b
   %sel = select i1 %cmp, i32 %a, i32 %b
   ret i32 %sel
 }
 
 define i32 @ab_ule_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ab_ule_i32
-; CHECK: min.u32
+; CHECK-LABEL: ab_ule_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ab_ule_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ab_ule_i32_param_1];
+; CHECK-NEXT:    min.u32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ule i32 %a, %b
   %sel = select i1 %cmp, i32 %a, i32 %b
   ret i32 %sel
@@ -195,32 +400,64 @@ define i32 @ab_ule_i32(i32 %a, i32 %b) {
 
 ; *** ab, signed, i32
 define i32 @ab_sgt_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ab_sgt_i32
-; CHECK: max.s32
+; CHECK-LABEL: ab_sgt_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ab_sgt_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ab_sgt_i32_param_1];
+; CHECK-NEXT:    max.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sgt i32 %a, %b
   %sel = select i1 %cmp, i32 %a, i32 %b
   ret i32 %sel
 }
 
 define i32 @ab_sge_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ab_sge_i32
-; CHECK: max.s32
+; CHECK-LABEL: ab_sge_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ab_sge_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ab_sge_i32_param_1];
+; CHECK-NEXT:    max.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sge i32 %a, %b
   %sel = select i1 %cmp, i32 %a, i32 %b
   ret i32 %sel
 }
 
 define i32 @ab_slt_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ab_slt_i32
-; CHECK: min.s32
+; CHECK-LABEL: ab_slt_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ab_slt_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ab_slt_i32_param_1];
+; CHECK-NEXT:    min.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp slt i32 %a, %b
   %sel = select i1 %cmp, i32 %a, i32 %b
   ret i32 %sel
 }
 
 define i32 @ab_sle_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ab_sle_i32
-; CHECK: min.s32
+; CHECK-LABEL: ab_sle_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ab_sle_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ab_sle_i32_param_1];
+; CHECK-NEXT:    min.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sle i32 %a, %b
   %sel = select i1 %cmp, i32 %a, i32 %b
   ret i32 %sel
@@ -228,32 +465,64 @@ define i32 @ab_sle_i32(i32 %a, i32 %b) {
 
 ; *** ba, unsigned, i32
 define i32 @ba_ugt_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ba_ugt_i32
-; CHECK: min.u32
+; CHECK-LABEL: ba_ugt_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ba_ugt_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ba_ugt_i32_param_1];
+; CHECK-NEXT:    min.u32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ugt i32 %a, %b
   %sel = select i1 %cmp, i32 %b, i32 %a
   ret i32 %sel
 }
 
 define i32 @ba_uge_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ba_uge_i32
-; CHECK: min.u32
+; CHECK-LABEL: ba_uge_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ba_uge_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ba_uge_i32_param_1];
+; CHECK-NEXT:    min.u32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp uge i32 %a, %b
   %sel = select i1 %cmp, i32 %b, i32 %a
   ret i32 %sel
 }
 
 define i32 @ba_ult_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ba_ult_i32
-; CHECK: max.u32
+; CHECK-LABEL: ba_ult_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ba_ult_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ba_ult_i32_param_1];
+; CHECK-NEXT:    max.u32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ult i32 %a, %b
   %sel = select i1 %cmp, i32 %b, i32 %a
   ret i32 %sel
 }
 
 define i32 @ba_ule_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ba_ule_i32
-; CHECK: max.u32
+; CHECK-LABEL: ba_ule_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ba_ule_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ba_ule_i32_param_1];
+; CHECK-NEXT:    max.u32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ule i32 %a, %b
   %sel = select i1 %cmp, i32 %b, i32 %a
   ret i32 %sel
@@ -261,32 +530,64 @@ define i32 @ba_ule_i32(i32 %a, i32 %b) {
 
 ; *** ba, signed, i32
 define i32 @ba_sgt_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ba_sgt_i32
-; CHECK: min.s32
+; CHECK-LABEL: ba_sgt_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ba_sgt_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ba_sgt_i32_param_1];
+; CHECK-NEXT:    min.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sgt i32 %a, %b
   %sel = select i1 %cmp, i32 %b, i32 %a
   ret i32 %sel
 }
 
 define i32 @ba_sge_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ba_sge_i32
-; CHECK: min.s32
+; CHECK-LABEL: ba_sge_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ba_sge_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ba_sge_i32_param_1];
+; CHECK-NEXT:    min.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sge i32 %a, %b
   %sel = select i1 %cmp, i32 %b, i32 %a
   ret i32 %sel
 }
 
 define i32 @ba_slt_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ba_slt_i32
-; CHECK: max.s32
+; CHECK-LABEL: ba_slt_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ba_slt_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ba_slt_i32_param_1];
+; CHECK-NEXT:    max.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp slt i32 %a, %b
   %sel = select i1 %cmp, i32 %b, i32 %a
   ret i32 %sel
 }
 
 define i32 @ba_sle_i32(i32 %a, i32 %b) {
-; CHECK-LABEL: @ba_sle_i32
-; CHECK: max.s32
+; CHECK-LABEL: ba_sle_i32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [ba_sle_i32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [ba_sle_i32_param_1];
+; CHECK-NEXT:    max.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sle i32 %a, %b
   %sel = select i1 %cmp, i32 %b, i32 %a
   ret i32 %sel
@@ -297,32 +598,64 @@ define i32 @ba_sle_i32(i32 %a, i32 %b) {
 
 ; *** ab, unsigned, i64
 define i64 @ab_ugt_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ab_ugt_i64
-; CHECK: max.u64
+; CHECK-LABEL: ab_ugt_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ab_ugt_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ab_ugt_i64_param_1];
+; CHECK-NEXT:    max.u64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ugt i64 %a, %b
   %sel = select i1 %cmp, i64 %a, i64 %b
   ret i64 %sel
 }
 
 define i64 @ab_uge_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ab_uge_i64
-; CHECK: max.u64
+; CHECK-LABEL: ab_uge_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ab_uge_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ab_uge_i64_param_1];
+; CHECK-NEXT:    max.u64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp uge i64 %a, %b
   %sel = select i1 %cmp, i64 %a, i64 %b
   ret i64 %sel
 }
 
 define i64 @ab_ult_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ab_ult_i64
-; CHECK: min.u64
+; CHECK-LABEL: ab_ult_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ab_ult_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ab_ult_i64_param_1];
+; CHECK-NEXT:    min.u64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ult i64 %a, %b
   %sel = select i1 %cmp, i64 %a, i64 %b
   ret i64 %sel
 }
 
 define i64 @ab_ule_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ab_ule_i64
-; CHECK: min.u64
+; CHECK-LABEL: ab_ule_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ab_ule_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ab_ule_i64_param_1];
+; CHECK-NEXT:    min.u64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ule i64 %a, %b
   %sel = select i1 %cmp, i64 %a, i64 %b
   ret i64 %sel
@@ -330,32 +663,64 @@ define i64 @ab_ule_i64(i64 %a, i64 %b) {
 
 ; *** ab, signed, i64
 define i64 @ab_sgt_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ab_sgt_i64
-; CHECK: max.s64
+; CHECK-LABEL: ab_sgt_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ab_sgt_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ab_sgt_i64_param_1];
+; CHECK-NEXT:    max.s64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sgt i64 %a, %b
   %sel = select i1 %cmp, i64 %a, i64 %b
   ret i64 %sel
 }
 
 define i64 @ab_sge_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ab_sge_i64
-; CHECK: max.s64
+; CHECK-LABEL: ab_sge_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ab_sge_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ab_sge_i64_param_1];
+; CHECK-NEXT:    max.s64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sge i64 %a, %b
   %sel = select i1 %cmp, i64 %a, i64 %b
   ret i64 %sel
 }
 
 define i64 @ab_slt_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ab_slt_i64
-; CHECK: min.s64
+; CHECK-LABEL: ab_slt_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ab_slt_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ab_slt_i64_param_1];
+; CHECK-NEXT:    min.s64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp slt i64 %a, %b
   %sel = select i1 %cmp, i64 %a, i64 %b
   ret i64 %sel
 }
 
 define i64 @ab_sle_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ab_sle_i64
-; CHECK: min.s64
+; CHECK-LABEL: ab_sle_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ab_sle_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ab_sle_i64_param_1];
+; CHECK-NEXT:    min.s64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sle i64 %a, %b
   %sel = select i1 %cmp, i64 %a, i64 %b
   ret i64 %sel
@@ -363,32 +728,64 @@ define i64 @ab_sle_i64(i64 %a, i64 %b) {
 
 ; *** ba, unsigned, i64
 define i64 @ba_ugt_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ba_ugt_i64
-; CHECK: min.u64
+; CHECK-LABEL: ba_ugt_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ba_ugt_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ba_ugt_i64_param_1];
+; CHECK-NEXT:    min.u64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ugt i64 %a, %b
   %sel = select i1 %cmp, i64 %b, i64 %a
   ret i64 %sel
 }
 
 define i64 @ba_uge_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ba_uge_i64
-; CHECK: min.u64
+; CHECK-LABEL: ba_uge_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ba_uge_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ba_uge_i64_param_1];
+; CHECK-NEXT:    min.u64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp uge i64 %a, %b
   %sel = select i1 %cmp, i64 %b, i64 %a
   ret i64 %sel
 }
 
 define i64 @ba_ult_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ba_ult_i64
-; CHECK: max.u64
+; CHECK-LABEL: ba_ult_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ba_ult_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ba_ult_i64_param_1];
+; CHECK-NEXT:    max.u64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ult i64 %a, %b
   %sel = select i1 %cmp, i64 %b, i64 %a
   ret i64 %sel
 }
 
 define i64 @ba_ule_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ba_ule_i64
-; CHECK: max.u64
+; CHECK-LABEL: ba_ule_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ba_ule_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ba_ule_i64_param_1];
+; CHECK-NEXT:    max.u64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp ule i64 %a, %b
   %sel = select i1 %cmp, i64 %b, i64 %a
   ret i64 %sel
@@ -396,33 +793,161 @@ define i64 @ba_ule_i64(i64 %a, i64 %b) {
 
 ; *** ba, signed, i64
 define i64 @ba_sgt_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ba_sgt_i64
-; CHECK: min.s64
+; CHECK-LABEL: ba_sgt_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ba_sgt_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ba_sgt_i64_param_1];
+; CHECK-NEXT:    min.s64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sgt i64 %a, %b
   %sel = select i1 %cmp, i64 %b, i64 %a
   ret i64 %sel
 }
 
 define i64 @ba_sge_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ba_sge_i64
-; CHECK: min.s64
+; CHECK-LABEL: ba_sge_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ba_sge_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ba_sge_i64_param_1];
+; CHECK-NEXT:    min.s64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sge i64 %a, %b
   %sel = select i1 %cmp, i64 %b, i64 %a
   ret i64 %sel
 }
 
 define i64 @ba_slt_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ba_slt_i64
-; CHECK: max.s64
+; CHECK-LABEL: ba_slt_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ba_slt_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ba_slt_i64_param_1];
+; CHECK-NEXT:    max.s64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp slt i64 %a, %b
   %sel = select i1 %cmp, i64 %b, i64 %a
   ret i64 %sel
 }
 
 define i64 @ba_sle_i64(i64 %a, i64 %b) {
-; CHECK-LABEL: @ba_sle_i64
-; CHECK: max.s64
+; CHECK-LABEL: ba_sle_i64(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b64 %rd1, [ba_sle_i64_param_0];
+; CHECK-NEXT:    ld.param.b64 %rd2, [ba_sle_i64_param_1];
+; CHECK-NEXT:    max.s64 %rd3, %rd1, %rd2;
+; CHECK-NEXT:    st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT:    ret;
   %cmp = icmp sle i64 %a, %b
   %sel = select i1 %cmp, i64 %b, i64 %a
   ret i64 %sel
 }
+
+define i32 @min_relu_s32(i32 %a, i32 %b) {
+; CHECK-LABEL: min_relu_s32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [min_relu_s32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [min_relu_s32_param_1];
+; CHECK-NEXT:    min.relu.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %min = call i32 @llvm.smin.s32(i32 %a, i32 %b)
+  %max = call i32 @llvm.smax.s32(i32 %min, i32 0)
+  ret i32 %max
+}
+
+define i32 @max_relu_s32(i32 %a, i32 %b) {
+; CHECK-LABEL: max_relu_s32(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [max_relu_s32_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [max_relu_s32_param_1];
+; CHECK-NEXT:    max.relu.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %max1 = call i32 @llvm.smax.s32(i32 %a, i32 %b)
+  %max2 = call i32 @llvm.smax.s32(i32 %max1, i32 0)
+  ret i32 %max2
+}
+
+define i32 @max_relu_s32_v2(i32 %a, i32 %b) {
+; CHECK-LABEL: max_relu_s32_v2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [max_relu_s32_v2_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [max_relu_s32_v2_param_1];
+; CHECK-NEXT:    max.relu.s32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %max2 = call i32 @llvm.smax.s32(i32 %a, i32 0)
+  %max1 = call i32 @llvm.smax.s32(i32 %max2, i32 %b)
+  ret i32 %max1
+}
+
+define <2 x i16> @min_relu_s16x2(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: min_relu_s16x2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [min_relu_s16x2_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [min_relu_s16x2_param_1];
+; CHECK-NEXT:    min.relu.s16x2 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %a, <2 x i16> %b)
+  %max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %min, <2 x i16> zeroinitializer)
+  ret <2 x i16> %max
+}
+
+define <2 x i16> @max_relu_s16x2(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: max_relu_s16x2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [max_relu_s16x2_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [max_relu_s16x2_param_1];
+; CHECK-NEXT:    max.relu.s16x2 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %max1 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> %b)
+  %max2 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %max1, <2 x i16> zeroinitializer)
+  ret <2 x i16> %max2
+}
+
+define <2 x i16> @max_relu_s16x2_v2(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: max_relu_s16x2_v2(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [max_relu_s16x2_v2_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [max_relu_s16x2_v2_param_1];
+; CHECK-NEXT:    max.relu.s16x2 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %max2 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> zeroinitializer)
+  %max1 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %max2, <2 x i16> %b)
+  ret <2 x i16> %max1
+}



More information about the llvm-commits mailing list