[llvm-commits] [llvm] r167948 - in /llvm/trunk: lib/Target/NVPTX/NVPTXISelLowering.cpp lib/Target/NVPTX/NVPTXISelLowering.h test/CodeGen/NVPTX/pr13291-i1-store.ll
Justin Holewinski
justin.holewinski at gmail.com
Wed Nov 14 11:46:22 PST 2012
As this is a bug fix, can we get this merged into the 3.2 branch?
On Wed, Nov 14, 2012 at 2:19 PM, Justin Holewinski
<jholewinski at nvidia.com>wrote:
> Author: jholewinski
> Date: Wed Nov 14 13:19:16 2012
> New Revision: 167948
>
> URL: http://llvm.org/viewvc/llvm-project?rev=167948&view=rev
> Log:
> [NVPTX] Implement custom lowering of loads/stores for i1
>
> Loads from i1 become loads from i8 followed by trunc
> Stores to i1 become zext to i8 followed by store to i8
>
> Fixes PR13291
>
> Added:
> llvm/trunk/test/CodeGen/NVPTX/pr13291-i1-store.ll
> Modified:
> llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
> llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h
>
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp?rev=167948&r1=167947&r2=167948&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.cpp Wed Nov 14 13:19:16
> 2012
> @@ -174,10 +174,11 @@
> setTruncStoreAction(MVT::f64, MVT::f32, Expand);
>
> // PTX does not support load / store predicate registers
> - setOperationAction(ISD::LOAD, MVT::i1, Expand);
> + setOperationAction(ISD::LOAD, MVT::i1, Custom);
> + setOperationAction(ISD::STORE, MVT::i1, Custom);
> +
> setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
> setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
> - setOperationAction(ISD::STORE, MVT::i1, Expand);
> setTruncStoreAction(MVT::i64, MVT::i1, Expand);
> setTruncStoreAction(MVT::i32, MVT::i1, Expand);
> setTruncStoreAction(MVT::i16, MVT::i1, Expand);
> @@ -856,11 +857,66 @@
> case ISD::EXTRACT_SUBVECTOR:
> return Op;
> case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
> + case ISD::STORE: return LowerSTORE(Op, DAG);
> + case ISD::LOAD: return LowerLOAD(Op, DAG);
> default:
> llvm_unreachable("Custom lowering not defined for operation");
> }
> }
>
> +
> +// v = ld i1* addr
> +// =>
> +// v1 = ld i8* addr
> +// v = trunc v1 to i1
> +SDValue NVPTXTargetLowering::
> +LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
> + SDNode *Node = Op.getNode();
> + LoadSDNode *LD = cast<LoadSDNode>(Node);
> + DebugLoc dl = Node->getDebugLoc();
> + ISD::LoadExtType ExtType = LD->getExtensionType();
> + assert(ExtType == ISD::NON_EXTLOAD) ;
> + EVT VT = Node->getValueType(0);
> + assert(VT == MVT::i1 && "Custom lowering for i1 load only");
> + SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(),
> LD->getBasePtr(),
> + LD->getPointerInfo(),
> + LD->isVolatile(), LD->isNonTemporal(),
> + LD->isInvariant(),
> + LD->getAlignment());
> + SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
> + // The legalizer (the caller) is expecting two values from the legalized
> + // load, so we build a MergeValues node for it. See
> ExpandUnalignedLoad()
> + // in LegalizeDAG.cpp which also uses MergeValues.
> + SDValue Ops[] = {result, LD->getChain()};
> + return DAG.getMergeValues(Ops, 2, dl);
> +}
> +
> +// st i1 v, addr
> +// =>
> +// v1 = zxt v to i8
> +// st i8, addr
> +SDValue NVPTXTargetLowering::
> +LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
> + SDNode *Node = Op.getNode();
> + DebugLoc dl = Node->getDebugLoc();
> + StoreSDNode *ST = cast<StoreSDNode>(Node);
> + SDValue Tmp1 = ST->getChain();
> + SDValue Tmp2 = ST->getBasePtr();
> + SDValue Tmp3 = ST->getValue();
> + EVT VT = Tmp3.getValueType();
> + assert(VT == MVT::i1 && "Custom lowering for i1 store only");
> + unsigned Alignment = ST->getAlignment();
> + bool isVolatile = ST->isVolatile();
> + bool isNonTemporal = ST->isNonTemporal();
> + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
> + MVT::i8, Tmp3);
> + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
> + ST->getPointerInfo(), isVolatile,
> + isNonTemporal, Alignment);
> + return Result;
> +}
> +
> +
> SDValue
> NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
> int idx,
> EVT v) const {
>
> Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h?rev=167948&r1=167947&r2=167948&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h (original)
> +++ llvm/trunk/lib/Target/NVPTX/NVPTXISelLowering.h Wed Nov 14 13:19:16
> 2012
> @@ -138,6 +138,9 @@
> SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
>
> SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
> +
> + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
> + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
> };
> } // namespace llvm
>
>
> Added: llvm/trunk/test/CodeGen/NVPTX/pr13291-i1-store.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/pr13291-i1-store.ll?rev=167948&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/NVPTX/pr13291-i1-store.ll (added)
> +++ llvm/trunk/test/CodeGen/NVPTX/pr13291-i1-store.ll Wed Nov 14 13:19:16
> 2012
> @@ -0,0 +1,26 @@
> +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
> --check-prefix=PTX32
> +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
> --check-prefix=PTX64
> +
> +define ptx_kernel void @t1(i1* %a) {
> +; PTX32: mov.u16 %rc{{[0-9]+}}, 0;
> +; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rc{{[0-9]+}};
> +; PTX64: mov.u16 %rc{{[0-9]+}}, 0;
> +; PTX64-NEXT: st.u8 [%rl{{[0-9]+}}], %rc{{[0-9]+}};
> + store i1 false, i1* %a
> + ret void
> +}
> +
> +
> +define ptx_kernel void @t2(i1* %a, i8* %b) {
> +; PTX32: ld.u8 %rc{{[0-9]+}}, [%r{{[0-9]+}}]
> +; PTX32: and.b16 temp, %rc{{[0-9]+}}, 1;
> +; PTX32: setp.b16.eq %p{{[0-9]+}}, temp, 1;
> +; PTX64: ld.u8 %rc{{[0-9]+}}, [%rl{{[0-9]+}}]
> +; PTX64: and.b16 temp, %rc{{[0-9]+}}, 1;
> +; PTX64: setp.b16.eq %p{{[0-9]+}}, temp, 1;
> +
> + %t1 = load i1* %a
> + %t2 = select i1 %t1, i8 1, i8 2
> + store i8 %t2, i8* %b
> + ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
--
Thanks,
Justin Holewinski
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20121114/85144b64/attachment.html>
More information about the llvm-commits
mailing list