[llvm] r227123 - [NVPTX] Generate a more optimal sequence for select of i1
Justin Holewinski
jholewinski at nvidia.com
Mon Jan 26 11:52:21 PST 2015
Author: jholewinski
Date: Mon Jan 26 13:52:20 2015
New Revision: 227123
URL: http://llvm.org/viewvc/llvm-project?rev=227123&view=rev
Log:
[NVPTX] Generate a more optimal sequence for select of i1
Instead of creating a pattern like "(p && a) || ((!p) && b)",
just expand the i8 operands to i32 and perform the selp on them.
Fixes PR22246
Added:
llvm/trunk/test/CodeGen/NVPTX/bug22246.ll
Modified:
llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h
llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=227123&r1=227122&r2=227123&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Mon Jan 26 13:52:20 2015
@@ -259,6 +259,9 @@ NVPTXTargetLowering::NVPTXTargetLowering
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ // PTX does not directly support SELP of i1, so promote to i32 first
+ setOperationAction(ISD::SELECT, MVT::i1, Custom);
+
// We have some custom DAG combine patterns for these nodes
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::AND);
@@ -1803,11 +1806,29 @@ NVPTXTargetLowering::LowerOperation(SDVa
case ISD::SRA_PARTS:
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG);
+ case ISD::SELECT:
+ return LowerSelect(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
}
+SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Op0 = Op->getOperand(0);
+ SDValue Op1 = Op->getOperand(1);
+ SDValue Op2 = Op->getOperand(2);
+ SDLoc DL(Op.getNode());
+
+ assert(Op.getValueType() == MVT::i1 && "Custom lowering enabled only for i1");
+
+ Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
+ Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
+ SDValue Select = DAG.getNode(ISD::SELECT, DL, MVT::i32, Op0, Op1, Op2);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Select);
+
+ return Trunc;
+}
+
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return LowerLOADi1(Op, DAG);
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h?rev=227123&r1=227122&r2=227123&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h Mon Jan 26 13:52:20 2015
@@ -529,6 +529,8 @@ private:
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
+
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td?rev=227123&r1=227122&r2=227123&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td Mon Jan 26 13:52:20 2015
@@ -1356,11 +1356,6 @@ defm SELP_u64 : SELP<"u64", Int64Regs, i
defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
-// Special select for predicate operands
-def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
- (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
- (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
-
//
// Funnnel shift in clamp mode
//
Added: llvm/trunk/test/CodeGen/NVPTX/bug22246.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug22246.ll?rev=227123&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/bug22246.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/bug22246.ll Mon Jan 26 13:52:20 2015
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK-LABEL: _Z3foobbbPb
+define void @_Z3foobbbPb(i1 zeroext %p1, i1 zeroext %p2, i1 zeroext %p3, i8* nocapture %output) {
+entry:
+; CHECK: selp.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}}
+ %.sink.v = select i1 %p1, i1 %p2, i1 %p3
+ %frombool5 = zext i1 %.sink.v to i8
+ store i8 %frombool5, i8* %output, align 1
+ ret void
+}
More information about the llvm-commits
mailing list