[llvm] r246107 - [NVPTX] Let NVPTX backend detect integer min and max patterns.
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 28 08:10:55 PDT 2015
On Wed, Aug 26, 2015 at 11:22:02PM -0000, Bjarke Hammersholt Roune via llvm-commits wrote:
> Author: broune
> Date: Wed Aug 26 18:22:02 2015
> New Revision: 246107
>
> URL: http://llvm.org/viewvc/llvm-project?rev=246107&view=rev
> Log:
> [NVPTX] Let NVPTX backend detect integer min and max patterns.
>
> Summary:
> Let NVPTX backend detect integer min and max patterns during isel and emit intrinsics that enable hardware support.
>
Now that there are integer min/max SDNodes nvptx should mark those as legal
so it can use the generic patterns.
-Tom
>
> Reviewers: jholewinski, meheff, jingyue
>
> Subscribers: arsenm, llvm-commits, meheff, jingyue, eliben, jholewinski
>
> Differential Revision: http://reviews.llvm.org/D12377
>
> Added:
> llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll
> Modified:
> llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
>
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=246107&r1=246106&r2=246107&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Wed Aug 26 18:22:02 2015
> @@ -279,6 +279,7 @@ NVPTXTargetLowering::NVPTXTargetLowering
> setTargetDAGCombine(ISD::FADD);
> setTargetDAGCombine(ISD::MUL);
> setTargetDAGCombine(ISD::SHL);
> + setTargetDAGCombine(ISD::SELECT);
>
> // Now deduce the information based on the above mentioned
> // actions
> @@ -4059,6 +4060,67 @@ static SDValue PerformANDCombine(SDNode
> return SDValue();
> }
>
> +static SDValue PerformSELECTCombine(SDNode *N,
> + TargetLowering::DAGCombinerInfo &DCI) {
> + // Currently this detects patterns for integer min and max and
> + // lowers them to PTX-specific intrinsics that enable hardware
> + // support.
> +
> + const SDValue Cond = N->getOperand(0);
> + if (Cond.getOpcode() != ISD::SETCC) return SDValue();
> +
> + const SDValue LHS = Cond.getOperand(0);
> + const SDValue RHS = Cond.getOperand(1);
> + const SDValue True = N->getOperand(1);
> + const SDValue False = N->getOperand(2);
> + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
> + return SDValue();
> +
> + const EVT VT = N->getValueType(0);
> + if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
> +
> + const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
> + SDValue Larger; // The larger of LHS and RHS when condition is true.
> + switch (CC) {
> + case ISD::SETULT:
> + case ISD::SETULE:
> + case ISD::SETLT:
> + case ISD::SETLE:
> + Larger = RHS;
> + break;
> +
> + case ISD::SETGT:
> + case ISD::SETGE:
> + case ISD::SETUGT:
> + case ISD::SETUGE:
> + Larger = LHS;
> + break;
> +
> + default:
> + return SDValue();
> + }
> + const bool IsMax = (Larger == True);
> + const bool IsSigned = ISD::isSignedIntSetCC(CC);
> +
> + unsigned IntrinsicId;
> + if (VT == MVT::i32) {
> + if (IsSigned)
> + IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
> + else
> + IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
> + } else {
> + assert(VT == MVT::i64);
> + if (IsSigned)
> + IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
> + else
> + IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
> + }
> +
> + SDLoc DL(N);
> + return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
> + DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
> +}
> +
> enum OperandSignedness {
> Signed = 0,
> Unsigned,
> @@ -4240,6 +4302,8 @@ SDValue NVPTXTargetLowering::PerformDAGC
> return PerformSHLCombine(N, DCI, OptLevel);
> case ISD::AND:
> return PerformANDCombine(N, DCI);
> + case ISD::SELECT:
> + return PerformSELECTCombine(N, DCI);
> }
> return SDValue();
> }
>
> Added: llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll?rev=246107&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/combine-min-max.ll Wed Aug 26 18:22:02 2015
> @@ -0,0 +1,307 @@
> +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O2 | FileCheck %s
> +
> +; *************************************
> +; * Cases with no min/max
> +
> +define i32 @ab_eq_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_slt_i32
> +; CHECK-NOT: min
> +; CHECK-NOT: max
> + %cmp = icmp eq i32 %a, %b
> + %sel = select i1 %cmp, i32 %a, i32 %b
> + ret i32 %sel
> +}
> +
> +define i64 @ba_ne_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ne_i64
> +; CHECK-NOT: min
> +; CHECK-NOT: max
> + %cmp = icmp ne i64 %a, %b
> + %sel = select i1 %cmp, i64 %b, i64 %a
> + ret i64 %sel
> +}
> +
> +; PTX does have e.g. max.s16, but at least as of Kepler (sm_3x) that
> +; gets compiled to SASS that converts the 16 bit parameters to 32 bit
> +; before using a 32 bit instruction. That is probably not a win and
> +; NVCC 7.5 does not emit 16 bit min/max either, presumably for that
> +; reason.
> +define i16 @ab_ugt_i16(i16 %a, i16 %b) {
> +; LABEL: @ab_ugt_i16
> +; CHECK-NOT: min
> +; CHECK-NOT: max
> + %cmp = icmp ugt i16 %a, %b
> + %sel = select i1 %cmp, i16 %a, i16 %b
> + ret i16 %sel
> +}
> +
> +
> +; *************************************
> +; * All variations with i32
> +
> +; *** ab, unsigned, i32
> +define i32 @ab_ugt_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_ugt_i32
> +; CHECK: max.u32
> + %cmp = icmp ugt i32 %a, %b
> + %sel = select i1 %cmp, i32 %a, i32 %b
> + ret i32 %sel
> +}
> +
> +define i32 @ab_uge_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_uge_i32
> +; CHECK: max.u32
> + %cmp = icmp uge i32 %a, %b
> + %sel = select i1 %cmp, i32 %a, i32 %b
> + ret i32 %sel
> +}
> +
> +define i32 @ab_ult_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_ult_i32
> +; CHECK: min.u32
> + %cmp = icmp ult i32 %a, %b
> + %sel = select i1 %cmp, i32 %a, i32 %b
> + ret i32 %sel
> +}
> +
> +define i32 @ab_ule_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_ule_i32
> +; CHECK: min.u32
> + %cmp = icmp ule i32 %a, %b
> + %sel = select i1 %cmp, i32 %a, i32 %b
> + ret i32 %sel
> +}
> +
> +; *** ab, signed, i32
> +define i32 @ab_sgt_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_ugt_i32
> +; CHECK: max.s32
> + %cmp = icmp sgt i32 %a, %b
> + %sel = select i1 %cmp, i32 %a, i32 %b
> + ret i32 %sel
> +}
> +
> +define i32 @ab_sge_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_sge_i32
> +; CHECK: max.s32
> + %cmp = icmp sge i32 %a, %b
> + %sel = select i1 %cmp, i32 %a, i32 %b
> + ret i32 %sel
> +}
> +
> +define i32 @ab_slt_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_slt_i32
> +; CHECK: min.s32
> + %cmp = icmp slt i32 %a, %b
> + %sel = select i1 %cmp, i32 %a, i32 %b
> + ret i32 %sel
> +}
> +
> +define i32 @ab_sle_i32(i32 %a, i32 %b) {
> +; LABEL: @ab_sle_i32
> +; CHECK: min.s32
> + %cmp = icmp sle i32 %a, %b
> + %sel = select i1 %cmp, i32 %a, i32 %b
> + ret i32 %sel
> +}
> +
> +; *** ba, unsigned, i32
> +define i32 @ba_ugt_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_ugt_i32
> +; CHECK: min.u32
> + %cmp = icmp ugt i32 %a, %b
> + %sel = select i1 %cmp, i32 %b, i32 %a
> + ret i32 %sel
> +}
> +
> +define i32 @ba_uge_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_uge_i32
> +; CHECK: min.u32
> + %cmp = icmp uge i32 %a, %b
> + %sel = select i1 %cmp, i32 %b, i32 %a
> + ret i32 %sel
> +}
> +
> +define i32 @ba_ult_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_ult_i32
> +; CHECK: max.u32
> + %cmp = icmp ult i32 %a, %b
> + %sel = select i1 %cmp, i32 %b, i32 %a
> + ret i32 %sel
> +}
> +
> +define i32 @ba_ule_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_ule_i32
> +; CHECK: max.u32
> + %cmp = icmp ule i32 %a, %b
> + %sel = select i1 %cmp, i32 %b, i32 %a
> + ret i32 %sel
> +}
> +
> +; *** ba, signed, i32
> +define i32 @ba_sgt_i32(i32 %a, i32 %b) {
> +; LBAEL: @ba_ugt_i32
> +; CHECK: min.s32
> + %cmp = icmp sgt i32 %a, %b
> + %sel = select i1 %cmp, i32 %b, i32 %a
> + ret i32 %sel
> +}
> +
> +define i32 @ba_sge_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_sge_i32
> +; CHECK: min.s32
> + %cmp = icmp sge i32 %a, %b
> + %sel = select i1 %cmp, i32 %b, i32 %a
> + ret i32 %sel
> +}
> +
> +define i32 @ba_slt_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_slt_i32
> +; CHECK: max.s32
> + %cmp = icmp slt i32 %a, %b
> + %sel = select i1 %cmp, i32 %b, i32 %a
> + ret i32 %sel
> +}
> +
> +define i32 @ba_sle_i32(i32 %a, i32 %b) {
> +; LABEL: @ba_sle_i32
> +; CHECK: max.s32
> + %cmp = icmp sle i32 %a, %b
> + %sel = select i1 %cmp, i32 %b, i32 %a
> + ret i32 %sel
> +}
> +
> +; *************************************
> +; * All variations with i64
> +
> +; *** ab, unsigned, i64
> +define i64 @ab_ugt_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ugt_i64
> +; CHECK: max.u64
> + %cmp = icmp ugt i64 %a, %b
> + %sel = select i1 %cmp, i64 %a, i64 %b
> + ret i64 %sel
> +}
> +
> +define i64 @ab_uge_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_uge_i64
> +; CHECK: max.u64
> + %cmp = icmp uge i64 %a, %b
> + %sel = select i1 %cmp, i64 %a, i64 %b
> + ret i64 %sel
> +}
> +
> +define i64 @ab_ult_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ult_i64
> +; CHECK: min.u64
> + %cmp = icmp ult i64 %a, %b
> + %sel = select i1 %cmp, i64 %a, i64 %b
> + ret i64 %sel
> +}
> +
> +define i64 @ab_ule_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ule_i64
> +; CHECK: min.u64
> + %cmp = icmp ule i64 %a, %b
> + %sel = select i1 %cmp, i64 %a, i64 %b
> + ret i64 %sel
> +}
> +
> +; *** ab, signed, i64
> +define i64 @ab_sgt_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_ugt_i64
> +; CHECK: max.s64
> + %cmp = icmp sgt i64 %a, %b
> + %sel = select i1 %cmp, i64 %a, i64 %b
> + ret i64 %sel
> +}
> +
> +define i64 @ab_sge_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_sge_i64
> +; CHECK: max.s64
> + %cmp = icmp sge i64 %a, %b
> + %sel = select i1 %cmp, i64 %a, i64 %b
> + ret i64 %sel
> +}
> +
> +define i64 @ab_slt_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_slt_i64
> +; CHECK: min.s64
> + %cmp = icmp slt i64 %a, %b
> + %sel = select i1 %cmp, i64 %a, i64 %b
> + ret i64 %sel
> +}
> +
> +define i64 @ab_sle_i64(i64 %a, i64 %b) {
> +; LABEL: @ab_sle_i64
> +; CHECK: min.s64
> + %cmp = icmp sle i64 %a, %b
> + %sel = select i1 %cmp, i64 %a, i64 %b
> + ret i64 %sel
> +}
> +
> +; *** ba, unsigned, i64
> +define i64 @ba_ugt_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_ugt_i64
> +; CHECK: min.u64
> + %cmp = icmp ugt i64 %a, %b
> + %sel = select i1 %cmp, i64 %b, i64 %a
> + ret i64 %sel
> +}
> +
> +define i64 @ba_uge_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_uge_i64
> +; CHECK: min.u64
> + %cmp = icmp uge i64 %a, %b
> + %sel = select i1 %cmp, i64 %b, i64 %a
> + ret i64 %sel
> +}
> +
> +define i64 @ba_ult_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_ult_i64
> +; CHECK: max.u64
> + %cmp = icmp ult i64 %a, %b
> + %sel = select i1 %cmp, i64 %b, i64 %a
> + ret i64 %sel
> +}
> +
> +define i64 @ba_ule_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_ule_i64
> +; CHECK: max.u64
> + %cmp = icmp ule i64 %a, %b
> + %sel = select i1 %cmp, i64 %b, i64 %a
> + ret i64 %sel
> +}
> +
> +; *** ba, signed, i64
> +define i64 @ba_sgt_i64(i64 %a, i64 %b) {
> +; LBAEL: @ba_ugt_i64
> +; CHECK: min.s64
> + %cmp = icmp sgt i64 %a, %b
> + %sel = select i1 %cmp, i64 %b, i64 %a
> + ret i64 %sel
> +}
> +
> +define i64 @ba_sge_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_sge_i64
> +; CHECK: min.s64
> + %cmp = icmp sge i64 %a, %b
> + %sel = select i1 %cmp, i64 %b, i64 %a
> + ret i64 %sel
> +}
> +
> +define i64 @ba_slt_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_slt_i64
> +; CHECK: max.s64
> + %cmp = icmp slt i64 %a, %b
> + %sel = select i1 %cmp, i64 %b, i64 %a
> + ret i64 %sel
> +}
> +
> +define i64 @ba_sle_i64(i64 %a, i64 %b) {
> +; LABEL: @ba_sle_i64
> +; CHECK: max.s64
> + %cmp = icmp sle i64 %a, %b
> + %sel = select i1 %cmp, i64 %b, i64 %a
> + ret i64 %sel
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list