[llvm] r246107 - [NVPTX] Let NVPTX backend detect integer min and max patterns.

Fri Aug 28 09:46:21 PDT 2015

AFAIK, there is no generic integer min/max pattern matching so far, but I
agree that it would better if there were and if NVPTX were using the
generic min/max nodes. See my last comment in the review.

On Fri, Aug 28, 2015 at 8:10 AM, Tom Stellard <tom at stellard.net> wrote:

> On Wed, Aug 26, 2015 at 11:22:02PM -0000, Bjarke Hammersholt Roune via
> llvm-commits wrote:
> > Author: broune
> > Date: Wed Aug 26 18:22:02 2015
> > New Revision: 246107
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=246107&view=rev
> > Log:
> > [NVPTX] Let NVPTX backend detect integer min and max patterns.
> >
> > Summary:
> > Let NVPTX backend detect integer min and max patterns during isel and
> emit intrinsics that enable hardware support.
> >
>
> Now that there are integer min/max SDNodes nvptx should mark those as legal
> so it can use the generic patterns.
>
> -Tom
>
> >
> > Reviewers: jholewinski, meheff, jingyue
> >
> > Subscribers: arsenm, llvm-commits, meheff, jingyue, eliben, jholewinski
> >
> > Differential Revision: http://reviews.llvm.org/D12377
> >
> > Added:
> >     llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll
> > Modified:
> >     llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
> >
> > Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=246107&r1=246106&r2=246107&view=diff
> >
> ==============================================================================
> > --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
> > +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Wed Aug 26
> 18:22:02 2015
> > @@ -279,6 +279,7 @@ NVPTXTargetLowering::NVPTXTargetLowering
> >    setTargetDAGCombine(ISD::FADD);
> >    setTargetDAGCombine(ISD::MUL);
> >    setTargetDAGCombine(ISD::SHL);
> > +  setTargetDAGCombine(ISD::SELECT);
> >
> >    // Now deduce the information based on the above mentioned
> >    // actions
> > @@ -4059,6 +4060,67 @@ static SDValue PerformANDCombine(SDNode
> >    return SDValue();
> >  }
> >
> > +static SDValue PerformSELECTCombine(SDNode *N,
> > +                                    TargetLowering::DAGCombinerInfo
> &DCI) {
> > +  // Currently this detects patterns for integer min and max and
> > +  // lowers them to PTX-specific intrinsics that enable hardware
> > +  // support.
> > +
> > +  const SDValue Cond = N->getOperand(0);
> > +  if (Cond.getOpcode() != ISD::SETCC) return SDValue();
> > +
> > +  const SDValue LHS = Cond.getOperand(0);
> > +  const SDValue RHS = Cond.getOperand(1);
> > +  const SDValue True = N->getOperand(1);
> > +  const SDValue False = N->getOperand(2);
> > +  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
> > +    return SDValue();
> > +
> > +  const EVT VT = N->getValueType(0);
> > +  if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
> > +
> > +  const ISD::CondCode CC =
> cast<CondCodeSDNode>(Cond.getOperand(2))->get();
> > +  SDValue Larger;  // The larger of LHS and RHS when condition is true.
> > +  switch (CC) {
> > +    case ISD::SETULT:
> > +    case ISD::SETULE:
> > +    case ISD::SETLT:
> > +    case ISD::SETLE:
> > +      Larger = RHS;
> > +      break;
> > +
> > +    case ISD::SETGT:
> > +    case ISD::SETGE:
> > +    case ISD::SETUGT:
> > +    case ISD::SETUGE:
> > +      Larger = LHS;
> > +      break;
> > +
> > +    default:
> > +      return SDValue();
> > +  }
> > +  const bool IsMax = (Larger == True);
> > +  const bool IsSigned = ISD::isSignedIntSetCC(CC);
> > +
> > +  unsigned IntrinsicId;
> > +  if (VT == MVT::i32) {
> > +    if (IsSigned)
> > +      IntrinsicId = IsMax ? Intrinsic::nvvm_max_i :
> Intrinsic::nvvm_min_i;
> > +    else
> > +      IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui :
> Intrinsic::nvvm_min_ui;
> > +  } else {
> > +    assert(VT == MVT::i64);
> > +    if (IsSigned)
> > +      IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll :
> Intrinsic::nvvm_min_ll;
> > +    else
> > +      IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull :
> Intrinsic::nvvm_min_ull;
> > +  }
> > +
> > +  SDLoc DL(N);
> > +  return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
> > +                         DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS,
> RHS);
> > +}
> > +
> >  enum OperandSignedness {
> >    Signed = 0,
> >    Unsigned,
> > @@ -4240,6 +4302,8 @@ SDValue NVPTXTargetLowering::PerformDAGC
> >        return PerformSHLCombine(N, DCI, OptLevel);
> >      case ISD::AND:
> >        return PerformANDCombine(N, DCI);
> > +    case ISD::SELECT:
> > +      return PerformSELECTCombine(N, DCI);
> >    }
> >    return SDValue();
> >  }
> >
> > Added: llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll
> > URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll?rev=246107&view=auto
> >
> ==============================================================================
> > --- llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll (added)
> > +++ llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll Wed Aug 26 18:22:02
> 2015
> > @@ -0,0 +1,307 @@
> > +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O2 | FileCheck %s
> > +
> > +; *************************************
> > +; * Cases with no min/max
> > +
> > +define i32 @ab_eq_i32(i32 %a, i32 %b) {
> > +; LABEL: @ab_slt_i32
> > +; CHECK-NOT: min
> > +; CHECK-NOT: max
> > +  %cmp = icmp eq i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %a, i32 %b
> > +  ret i32 %sel
> > +}
> > +
> > +define i64 @ba_ne_i64(i64 %a, i64 %b) {
> > +; LABEL: @ab_ne_i64
> > +; CHECK-NOT: min
> > +; CHECK-NOT: max
> > +  %cmp = icmp ne i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %b, i64 %a
> > +  ret i64 %sel
> > +}
> > +
> > +; PTX does have e.g. max.s16, but at least as of Kepler (sm_3x) that
> > +; gets compiled to SASS that converts the 16 bit parameters to 32 bit
> > +; before using a 32 bit instruction. That is probably not a win and
> > +; NVCC 7.5 does not emit 16 bit min/max either, presumably for that
> > +; reason.
> > +define i16 @ab_ugt_i16(i16 %a, i16 %b) {
> > +; LABEL: @ab_ugt_i16
> > +; CHECK-NOT: min
> > +; CHECK-NOT: max
> > +  %cmp = icmp ugt i16 %a, %b
> > +  %sel = select i1 %cmp, i16 %a, i16 %b
> > +  ret i16 %sel
> > +}
> > +
> > +
> > +; *************************************
> > +; * All variations with i32
> > +
> > +; *** ab, unsigned, i32
> > +define i32 @ab_ugt_i32(i32 %a, i32 %b) {
> > +; LABEL: @ab_ugt_i32
> > +; CHECK: max.u32
> > +  %cmp = icmp ugt i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %a, i32 %b
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ab_uge_i32(i32 %a, i32 %b) {
> > +; LABEL: @ab_uge_i32
> > +; CHECK: max.u32
> > +  %cmp = icmp uge i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %a, i32 %b
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ab_ult_i32(i32 %a, i32 %b) {
> > +; LABEL: @ab_ult_i32
> > +; CHECK: min.u32
> > +  %cmp = icmp ult i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %a, i32 %b
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ab_ule_i32(i32 %a, i32 %b) {
> > +; LABEL: @ab_ule_i32
> > +; CHECK: min.u32
> > +  %cmp = icmp ule i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %a, i32 %b
> > +  ret i32 %sel
> > +}
> > +
> > +; *** ab, signed, i32
> > +define i32 @ab_sgt_i32(i32 %a, i32 %b) {
> > +; LABEL: @ab_ugt_i32
> > +; CHECK: max.s32
> > +  %cmp = icmp sgt i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %a, i32 %b
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ab_sge_i32(i32 %a, i32 %b) {
> > +; LABEL: @ab_sge_i32
> > +; CHECK: max.s32
> > +  %cmp = icmp sge i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %a, i32 %b
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ab_slt_i32(i32 %a, i32 %b) {
> > +; LABEL: @ab_slt_i32
> > +; CHECK: min.s32
> > +  %cmp = icmp slt i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %a, i32 %b
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ab_sle_i32(i32 %a, i32 %b) {
> > +; LABEL: @ab_sle_i32
> > +; CHECK: min.s32
> > +  %cmp = icmp sle i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %a, i32 %b
> > +  ret i32 %sel
> > +}
> > +
> > +; *** ba, unsigned, i32
> > +define i32 @ba_ugt_i32(i32 %a, i32 %b) {
> > +; LABEL: @ba_ugt_i32
> > +; CHECK: min.u32
> > +  %cmp = icmp ugt i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %b, i32 %a
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ba_uge_i32(i32 %a, i32 %b) {
> > +; LABEL: @ba_uge_i32
> > +; CHECK: min.u32
> > +  %cmp = icmp uge i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %b, i32 %a
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ba_ult_i32(i32 %a, i32 %b) {
> > +; LABEL: @ba_ult_i32
> > +; CHECK: max.u32
> > +  %cmp = icmp ult i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %b, i32 %a
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ba_ule_i32(i32 %a, i32 %b) {
> > +; LABEL: @ba_ule_i32
> > +; CHECK: max.u32
> > +  %cmp = icmp ule i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %b, i32 %a
> > +  ret i32 %sel
> > +}
> > +
> > +; *** ba, signed, i32
> > +define i32 @ba_sgt_i32(i32 %a, i32 %b) {
> > +; LBAEL: @ba_ugt_i32
> > +; CHECK: min.s32
> > +  %cmp = icmp sgt i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %b, i32 %a
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ba_sge_i32(i32 %a, i32 %b) {
> > +; LABEL: @ba_sge_i32
> > +; CHECK: min.s32
> > +  %cmp = icmp sge i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %b, i32 %a
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ba_slt_i32(i32 %a, i32 %b) {
> > +; LABEL: @ba_slt_i32
> > +; CHECK: max.s32
> > +  %cmp = icmp slt i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %b, i32 %a
> > +  ret i32 %sel
> > +}
> > +
> > +define i32 @ba_sle_i32(i32 %a, i32 %b) {
> > +; LABEL: @ba_sle_i32
> > +; CHECK: max.s32
> > +  %cmp = icmp sle i32 %a, %b
> > +  %sel = select i1 %cmp, i32 %b, i32 %a
> > +  ret i32 %sel
> > +}
> > +
> > +; *************************************
> > +; * All variations with i64
> > +
> > +; *** ab, unsigned, i64
> > +define i64 @ab_ugt_i64(i64 %a, i64 %b) {
> > +; LABEL: @ab_ugt_i64
> > +; CHECK: max.u64
> > +  %cmp = icmp ugt i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %a, i64 %b
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ab_uge_i64(i64 %a, i64 %b) {
> > +; LABEL: @ab_uge_i64
> > +; CHECK: max.u64
> > +  %cmp = icmp uge i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %a, i64 %b
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ab_ult_i64(i64 %a, i64 %b) {
> > +; LABEL: @ab_ult_i64
> > +; CHECK: min.u64
> > +  %cmp = icmp ult i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %a, i64 %b
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ab_ule_i64(i64 %a, i64 %b) {
> > +; LABEL: @ab_ule_i64
> > +; CHECK: min.u64
> > +  %cmp = icmp ule i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %a, i64 %b
> > +  ret i64 %sel
> > +}
> > +
> > +; *** ab, signed, i64
> > +define i64 @ab_sgt_i64(i64 %a, i64 %b) {
> > +; LABEL: @ab_ugt_i64
> > +; CHECK: max.s64
> > +  %cmp = icmp sgt i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %a, i64 %b
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ab_sge_i64(i64 %a, i64 %b) {
> > +; LABEL: @ab_sge_i64
> > +; CHECK: max.s64
> > +  %cmp = icmp sge i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %a, i64 %b
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ab_slt_i64(i64 %a, i64 %b) {
> > +; LABEL: @ab_slt_i64
> > +; CHECK: min.s64
> > +  %cmp = icmp slt i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %a, i64 %b
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ab_sle_i64(i64 %a, i64 %b) {
> > +; LABEL: @ab_sle_i64
> > +; CHECK: min.s64
> > +  %cmp = icmp sle i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %a, i64 %b
> > +  ret i64 %sel
> > +}
> > +
> > +; *** ba, unsigned, i64
> > +define i64 @ba_ugt_i64(i64 %a, i64 %b) {
> > +; LABEL: @ba_ugt_i64
> > +; CHECK: min.u64
> > +  %cmp = icmp ugt i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %b, i64 %a
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ba_uge_i64(i64 %a, i64 %b) {
> > +; LABEL: @ba_uge_i64
> > +; CHECK: min.u64
> > +  %cmp = icmp uge i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %b, i64 %a
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ba_ult_i64(i64 %a, i64 %b) {
> > +; LABEL: @ba_ult_i64
> > +; CHECK: max.u64
> > +  %cmp = icmp ult i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %b, i64 %a
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ba_ule_i64(i64 %a, i64 %b) {
> > +; LABEL: @ba_ule_i64
> > +; CHECK: max.u64
> > +  %cmp = icmp ule i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %b, i64 %a
> > +  ret i64 %sel
> > +}
> > +
> > +; *** ba, signed, i64
> > +define i64 @ba_sgt_i64(i64 %a, i64 %b) {
> > +; LBAEL: @ba_ugt_i64
> > +; CHECK: min.s64
> > +  %cmp = icmp sgt i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %b, i64 %a
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ba_sge_i64(i64 %a, i64 %b) {
> > +; LABEL: @ba_sge_i64
> > +; CHECK: min.s64
> > +  %cmp = icmp sge i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %b, i64 %a
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ba_slt_i64(i64 %a, i64 %b) {
> > +; LABEL: @ba_slt_i64
> > +; CHECK: max.s64
> > +  %cmp = icmp slt i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %b, i64 %a
> > +  ret i64 %sel
> > +}
> > +
> > +define i64 @ba_sle_i64(i64 %a, i64 %b) {
> > +; LABEL: @ba_sle_i64
> > +; CHECK: max.s64
> > +  %cmp = icmp sle i64 %a, %b
> > +  %sel = select i1 %cmp, i64 %b, i64 %a
> > +  ret i64 %sel
> > +}
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at lists.llvm.org
> > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150828/e7569b4e/attachment.html>