[llvm] r246107 - [NVPTX] Let NVPTX backend detect integer min and max patterns.

Tom Stellard via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 28 08:10:55 PDT 2015


On Wed, Aug 26, 2015 at 11:22:02PM -0000, Bjarke Hammersholt Roune via llvm-commits wrote:
> Author: broune
> Date: Wed Aug 26 18:22:02 2015
> New Revision: 246107
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=246107&view=rev
> Log:
> [NVPTX] Let NVPTX backend detect integer min and max patterns.
> 
> Summary:
> Let NVPTX backend detect integer min and max patterns during isel and emit intrinsics that enable hardware support.
> 

Now that there are integer min/max SDNodes nvptx should mark those as legal
so it can use the generic patterns.

-Tom

> 
> Reviewers: jholewinski, meheff, jingyue
> 
> Subscribers: arsenm, llvm-commits, meheff, jingyue, eliben, jholewinski
> 
> Differential Revision: http://reviews.llvm.org/D12377
> 
> Added:
>     llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll
> Modified:
>     llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
> 
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=246107&r1=246106&r2=246107&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Wed Aug 26 18:22:02 2015
> @@ -279,6 +279,7 @@ NVPTXTargetLowering::NVPTXTargetLowering
>    setTargetDAGCombine(ISD::FADD);
>    setTargetDAGCombine(ISD::MUL);
>    setTargetDAGCombine(ISD::SHL);
> +  setTargetDAGCombine(ISD::SELECT);
>  
>    // Now deduce the information based on the above mentioned
>    // actions
> @@ -4059,6 +4060,67 @@ static SDValue PerformANDCombine(SDNode
>    return SDValue();
>  }
>  
> +static SDValue PerformSELECTCombine(SDNode *N,
> +                                    TargetLowering::DAGCombinerInfo &DCI) {
> +  // Currently this detects patterns for integer min and max and
> +  // lowers them to PTX-specific intrinsics that enable hardware
> +  // support.
> +
> +  const SDValue Cond = N->getOperand(0);
> +  if (Cond.getOpcode() != ISD::SETCC) return SDValue();
> +
> +  const SDValue LHS = Cond.getOperand(0);
> +  const SDValue RHS = Cond.getOperand(1);
> +  const SDValue True = N->getOperand(1);
> +  const SDValue False = N->getOperand(2);
> +  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
> +    return SDValue();
> +
> +  const EVT VT = N->getValueType(0);
> +  if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
> +
> +  const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
> +  SDValue Larger;  // The larger of LHS and RHS when condition is true.
> +  switch (CC) {
> +    case ISD::SETULT:
> +    case ISD::SETULE:
> +    case ISD::SETLT:
> +    case ISD::SETLE:
> +      Larger = RHS;
> +      break;
> +
> +    case ISD::SETGT:
> +    case ISD::SETGE:
> +    case ISD::SETUGT:
> +    case ISD::SETUGE:
> +      Larger = LHS;
> +      break;
> +
> +    default:
> +      return SDValue();
> +  }
> +  const bool IsMax = (Larger == True);
> +  const bool IsSigned = ISD::isSignedIntSetCC(CC);
> +
> +  unsigned IntrinsicId;
> +  if (VT == MVT::i32) {
> +    if (IsSigned)
> +      IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
> +    else
> +      IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
> +  } else {
> +    assert(VT == MVT::i64);
> +    if (IsSigned)
> +      IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
> +    else
> +      IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
> +  }
> +
> +  SDLoc DL(N);
> +  return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
> +                         DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
> +}
> +
>  enum OperandSignedness {
>    Signed = 0,
>    Unsigned,
> @@ -4240,6 +4302,8 @@ SDValue NVPTXTargetLowering::PerformDAGC
>        return PerformSHLCombine(N, DCI, OptLevel);
>      case ISD::AND:
>        return PerformANDCombine(N, DCI);
> +    case ISD::SELECT:
> +      return PerformSELECTCombine(N, DCI);
>    }
>    return SDValue();
>  }
> 
> Added: llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll?rev=246107&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll Wed Aug 26 18:22:02 2015
> @@ -0,0 +1,307 @@
> +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O2 | FileCheck %s
> +
> +; *************************************
> +; * Cases with no min/max
> +
> +define i32 @ab_eq_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_slt_i32
> +; CHECK-NOT: min
> +; CHECK-NOT: max
> +  %cmp = icmp eq i32 %a, %b
> +  %sel = select i1 %cmp, i32 %a, i32 %b
> +  ret i32 %sel
> +}
> +
> +define i64 @ba_ne_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ne_i64
> +; CHECK-NOT: min
> +; CHECK-NOT: max
> +  %cmp = icmp ne i64 %a, %b
> +  %sel = select i1 %cmp, i64 %b, i64 %a
> +  ret i64 %sel
> +}
> +
> +; PTX does have e.g. max.s16, but at least as of Kepler (sm_3x) that
> +; gets compiled to SASS that converts the 16 bit parameters to 32 bit
> +; before using a 32 bit instruction. That is probably not a win and
> +; NVCC 7.5 does not emit 16 bit min/max either, presumably for that
> +; reason.
> +define i16 @ab_ugt_i16(i16 %a, i16 %b) {
> +; LABEL: @ab_ugt_i16
> +; CHECK-NOT: min
> +; CHECK-NOT: max
> +  %cmp = icmp ugt i16 %a, %b
> +  %sel = select i1 %cmp, i16 %a, i16 %b
> +  ret i16 %sel
> +}
> +
> +
> +; *************************************
> +; * All variations with i32
> +
> +; *** ab, unsigned, i32
> +define i32 @ab_ugt_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_ugt_i32
> +; CHECK: max.u32
> +  %cmp = icmp ugt i32 %a, %b
> +  %sel = select i1 %cmp, i32 %a, i32 %b
> +  ret i32 %sel
> +}
> +
> +define i32 @ab_uge_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_uge_i32
> +; CHECK: max.u32
> +  %cmp = icmp uge i32 %a, %b
> +  %sel = select i1 %cmp, i32 %a, i32 %b
> +  ret i32 %sel
> +}
> +
> +define i32 @ab_ult_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_ult_i32
> +; CHECK: min.u32
> +  %cmp = icmp ult i32 %a, %b
> +  %sel = select i1 %cmp, i32 %a, i32 %b
> +  ret i32 %sel
> +}
> +
> +define i32 @ab_ule_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_ule_i32
> +; CHECK: min.u32
> +  %cmp = icmp ule i32 %a, %b
> +  %sel = select i1 %cmp, i32 %a, i32 %b
> +  ret i32 %sel
> +}
> +
> +; *** ab, signed, i32
> +define i32 @ab_sgt_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_ugt_i32
> +; CHECK: max.s32
> +  %cmp = icmp sgt i32 %a, %b
> +  %sel = select i1 %cmp, i32 %a, i32 %b
> +  ret i32 %sel
> +}
> +
> +define i32 @ab_sge_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_sge_i32
> +; CHECK: max.s32
> +  %cmp = icmp sge i32 %a, %b
> +  %sel = select i1 %cmp, i32 %a, i32 %b
> +  ret i32 %sel
> +}
> +
> +define i32 @ab_slt_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_slt_i32
> +; CHECK: min.s32
> +  %cmp = icmp slt i32 %a, %b
> +  %sel = select i1 %cmp, i32 %a, i32 %b
> +  ret i32 %sel
> +}
> +
> +define i32 @ab_sle_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_sle_i32
> +; CHECK: min.s32
> +  %cmp = icmp sle i32 %a, %b
> +  %sel = select i1 %cmp, i32 %a, i32 %b
> +  ret i32 %sel
> +}
> +
> +; *** ba, unsigned, i32
> +define i32 @ba_ugt_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_ugt_i32
> +; CHECK: min.u32
> +  %cmp = icmp ugt i32 %a, %b
> +  %sel = select i1 %cmp, i32 %b, i32 %a
> +  ret i32 %sel
> +}
> +
> +define i32 @ba_uge_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_uge_i32
> +; CHECK: min.u32
> +  %cmp = icmp uge i32 %a, %b
> +  %sel = select i1 %cmp, i32 %b, i32 %a
> +  ret i32 %sel
> +}
> +
> +define i32 @ba_ult_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_ult_i32
> +; CHECK: max.u32
> +  %cmp = icmp ult i32 %a, %b
> +  %sel = select i1 %cmp, i32 %b, i32 %a
> +  ret i32 %sel
> +}
> +
> +define i32 @ba_ule_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_ule_i32
> +; CHECK: max.u32
> +  %cmp = icmp ule i32 %a, %b
> +  %sel = select i1 %cmp, i32 %b, i32 %a
> +  ret i32 %sel
> +}
> +
> +; *** ba, signed, i32
> +define i32 @ba_sgt_i32(i32 %a, i32 %b) {
> +; LBAEL: @ba_ugt_i32
> +; CHECK: min.s32
> +  %cmp = icmp sgt i32 %a, %b
> +  %sel = select i1 %cmp, i32 %b, i32 %a
> +  ret i32 %sel
> +}
> +
> +define i32 @ba_sge_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_sge_i32
> +; CHECK: min.s32
> +  %cmp = icmp sge i32 %a, %b
> +  %sel = select i1 %cmp, i32 %b, i32 %a
> +  ret i32 %sel
> +}
> +
> +define i32 @ba_slt_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_slt_i32
> +; CHECK: max.s32
> +  %cmp = icmp slt i32 %a, %b
> +  %sel = select i1 %cmp, i32 %b, i32 %a
> +  ret i32 %sel
> +}
> +
> +define i32 @ba_sle_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_sle_i32
> +; CHECK: max.s32
> +  %cmp = icmp sle i32 %a, %b
> +  %sel = select i1 %cmp, i32 %b, i32 %a
> +  ret i32 %sel
> +}
> +
> +; *************************************
> +; * All variations with i64
> +
> +; *** ab, unsigned, i64
> +define i64 @ab_ugt_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ugt_i64
> +; CHECK: max.u64
> +  %cmp = icmp ugt i64 %a, %b
> +  %sel = select i1 %cmp, i64 %a, i64 %b
> +  ret i64 %sel
> +}
> +
> +define i64 @ab_uge_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_uge_i64
> +; CHECK: max.u64
> +  %cmp = icmp uge i64 %a, %b
> +  %sel = select i1 %cmp, i64 %a, i64 %b
> +  ret i64 %sel
> +}
> +
> +define i64 @ab_ult_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ult_i64
> +; CHECK: min.u64
> +  %cmp = icmp ult i64 %a, %b
> +  %sel = select i1 %cmp, i64 %a, i64 %b
> +  ret i64 %sel
> +}
> +
> +define i64 @ab_ule_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ule_i64
> +; CHECK: min.u64
> +  %cmp = icmp ule i64 %a, %b
> +  %sel = select i1 %cmp, i64 %a, i64 %b
> +  ret i64 %sel
> +}
> +
> +; *** ab, signed, i64
> +define i64 @ab_sgt_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ugt_i64
> +; CHECK: max.s64
> +  %cmp = icmp sgt i64 %a, %b
> +  %sel = select i1 %cmp, i64 %a, i64 %b
> +  ret i64 %sel
> +}
> +
> +define i64 @ab_sge_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_sge_i64
> +; CHECK: max.s64
> +  %cmp = icmp sge i64 %a, %b
> +  %sel = select i1 %cmp, i64 %a, i64 %b
> +  ret i64 %sel
> +}
> +
> +define i64 @ab_slt_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_slt_i64
> +; CHECK: min.s64
> +  %cmp = icmp slt i64 %a, %b
> +  %sel = select i1 %cmp, i64 %a, i64 %b
> +  ret i64 %sel
> +}
> +
> +define i64 @ab_sle_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_sle_i64
> +; CHECK: min.s64
> +  %cmp = icmp sle i64 %a, %b
> +  %sel = select i1 %cmp, i64 %a, i64 %b
> +  ret i64 %sel
> +}
> +
> +; *** ba, unsigned, i64
> +define i64 @ba_ugt_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_ugt_i64
> +; CHECK: min.u64
> +  %cmp = icmp ugt i64 %a, %b
> +  %sel = select i1 %cmp, i64 %b, i64 %a
> +  ret i64 %sel
> +}
> +
> +define i64 @ba_uge_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_uge_i64
> +; CHECK: min.u64
> +  %cmp = icmp uge i64 %a, %b
> +  %sel = select i1 %cmp, i64 %b, i64 %a
> +  ret i64 %sel
> +}
> +
> +define i64 @ba_ult_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_ult_i64
> +; CHECK: max.u64
> +  %cmp = icmp ult i64 %a, %b
> +  %sel = select i1 %cmp, i64 %b, i64 %a
> +  ret i64 %sel
> +}
> +
> +define i64 @ba_ule_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_ule_i64
> +; CHECK: max.u64
> +  %cmp = icmp ule i64 %a, %b
> +  %sel = select i1 %cmp, i64 %b, i64 %a
> +  ret i64 %sel
> +}
> +
> +; *** ba, signed, i64
> +define i64 @ba_sgt_i64(i64 %a, i64 %b) {
> +; LBAEL: @ba_ugt_i64
> +; CHECK: min.s64
> +  %cmp = icmp sgt i64 %a, %b
> +  %sel = select i1 %cmp, i64 %b, i64 %a
> +  ret i64 %sel
> +}
> +
> +define i64 @ba_sge_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_sge_i64
> +; CHECK: min.s64
> +  %cmp = icmp sge i64 %a, %b
> +  %sel = select i1 %cmp, i64 %b, i64 %a
> +  ret i64 %sel
> +}
> +
> +define i64 @ba_slt_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_slt_i64
> +; CHECK: max.s64
> +  %cmp = icmp slt i64 %a, %b
> +  %sel = select i1 %cmp, i64 %b, i64 %a
> +  ret i64 %sel
> +}
> +
> +define i64 @ba_sle_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_sle_i64
> +; CHECK: max.s64
> +  %cmp = icmp sle i64 %a, %b
> +  %sel = select i1 %cmp, i64 %b, i64 %a
> +  ret i64 %sel
> +}
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list