[llvm] r268272 - [NVPTX] Fix sign/zero-extending ldg/ldu instruction selection
Justin Holewinski via llvm-commits
llvm-commits at lists.llvm.org
Mon May 2 11:12:02 PDT 2016
Author: jholewinski
Date: Mon May 2 13:12:02 2016
New Revision: 268272
URL: http://llvm.org/viewvc/llvm-project?rev=268272&view=rev
Log:
[NVPTX] Fix sign/zero-extending ldg/ldu instruction selection
Summary:
We don't have sign-/zero-extending ldg/ldu instructions defined,
so we need to emulate them with explicit CVTs. We were originally
handling the i8 case, but not any other cases.
Fixes PR26185
Reviewers: jingyue, jlebar
Subscribers: jholewinski
Differential Revision: http://reviews.llvm.org/D19615
Added:
llvm/trunk/test/CodeGen/NVPTX/bug26185-2.ll
Modified:
llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp?rev=268272&r1=268271&r2=268272&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Mon May 2 13:12:02 2016
@@ -2062,61 +2062,33 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(
//
// i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
//
- // Since we load an i8 value, the matching logic above will have selected an
- // LDG instruction that reads i8 and stores it in an i16 register (NVPTX does
- // not expose 8-bit registers):
- //
- // i16,ch = INT_PTX_LDG_GLOBAL_i8areg64 t7, t0
- //
- // To get the correct type in this case, truncate back to i8 and then extend
- // to the original load type.
+ // In this case, the matching logic above will select a load for the original
+ // memory type (in this case, i8) and our types will not match (the node needs
+ // to return an i32 in this case). Our LDG/LDU nodes do not support the
+ // concept of sign-/zero-extension, so emulate it here by adding an explicit
+ // CVT instruction. Ptxas should clean up any redundancies here.
+
EVT OrigType = N->getValueType(0);
- LoadSDNode *LDSD = dyn_cast<LoadSDNode>(N);
- if (LDSD && EltVT == MVT::i8 && OrigType.getScalarSizeInBits() >= 32) {
- unsigned CvtOpc = 0;
+ LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
- switch (LDSD->getExtensionType()) {
- default:
- llvm_unreachable("An extension is required for i8 loads");
- break;
- case ISD::SEXTLOAD:
- switch (OrigType.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("Unhandled integer load type");
- break;
- case MVT::i32:
- CvtOpc = NVPTX::CVT_s32_s8;
- break;
- case MVT::i64:
- CvtOpc = NVPTX::CVT_s64_s8;
- break;
- }
- break;
- case ISD::EXTLOAD:
- case ISD::ZEXTLOAD:
- switch (OrigType.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("Unhandled integer load type");
- break;
- case MVT::i32:
- CvtOpc = NVPTX::CVT_u32_u8;
- break;
- case MVT::i64:
- CvtOpc = NVPTX::CVT_u64_u8;
- break;
- }
- break;
- }
+ if (OrigType != EltVT && LdNode) {
+ // We have an extending-load. The instruction we selected operates on the
+ // smaller type, but the SDNode we are replacing has the larger type. We
+ // need to emit a CVT to make the types match.
+ bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
+ unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
+ EltVT.getSimpleVT(), IsSigned);
- // For each output value, truncate to i8 (since the upper 8 bits are
- // undefined) and then extend to the desired type.
+ // For each output value, apply the manual sign/zero-extension and make sure
+ // all users of the load go through that CVT.
for (unsigned i = 0; i != NumElts; ++i) {
SDValue Res(LD, i);
SDValue OrigVal(N, i);
SDNode *CvtNode =
CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
- CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32));
+ CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
+ DL, MVT::i32));
ReplaceUses(OrigVal, SDValue(CvtNode, 0));
}
}
@@ -5199,3 +5171,57 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmM
}
return true;
}
+
+/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
+/// conversion from \p SrcTy to \p DestTy.
+unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
+ bool IsSigned) {
+ switch (SrcTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled source type");
+ case MVT::i8:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i16:
+ return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
+ case MVT::i32:
+ return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
+ case MVT::i64:
+ return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
+ }
+ case MVT::i16:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i8:
+ return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
+ case MVT::i32:
+ return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
+ case MVT::i64:
+ return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
+ }
+ case MVT::i32:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i8:
+ return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
+ case MVT::i16:
+ return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
+ case MVT::i64:
+ return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
+ }
+ case MVT::i64:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i8:
+ return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
+ case MVT::i16:
+ return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
+ case MVT::i32:
+ return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
+ }
+ }
+}
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h?rev=268272&r1=268271&r2=268272&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.h Mon May 2 13:12:02 2016
@@ -93,6 +93,7 @@ private:
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
+ static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, bool IsSigned);
};
} // end namespace llvm
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td?rev=268272&r1=268271&r2=268272&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td Mon May 2 13:12:02 2016
@@ -377,6 +377,8 @@ let hasSideEffects = 0 in {
}
// Generate cvts from all types to all types.
+ defm CVT_s8 : CVT_FROM_ALL<"s8", Int16Regs>;
+ defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>;
defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
Added: llvm/trunk/test/CodeGen/NVPTX/bug26185-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug26185-2.ll?rev=268272&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/bug26185-2.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/bug26185-2.ll Mon May 2 13:12:02 2016
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
+
+; Verify that we correctly emit code for extending ldg/ldu. We do not expose
+; extending variants in the backend, but the ldg/ldu selection code may pick
+; extending loads as candidates. We do want to support this, so make sure we
+; emit the necessary cvt.* instructions to implement the extension and let ptxas
+; emit the real extending loads.
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK-LABEL: spam
+define ptx_kernel void @spam(i8 addrspace(1)* noalias nocapture readonly %arg, i8 addrspace(1)* noalias nocapture %arg1, i64 %arg2, i64 %arg3) #0 {
+bb:
+ %tmp = bitcast i8 addrspace(1)* %arg to i16 addrspace(1)*
+ %tmp4 = bitcast i8 addrspace(1)* %arg1 to i64 addrspace(1)*
+ %tmp5 = add nsw i64 %arg3, 8
+ %tmp6 = getelementptr i16, i16 addrspace(1)* %tmp, i64 %tmp5
+; CHECK: ld.global.nc.u16
+ %tmp7 = load i16, i16 addrspace(1)* %tmp6, align 2
+; CHECK: cvt.s32.s16
+ %tmp8 = sext i16 %tmp7 to i64
+ %tmp9 = mul nsw i64 %tmp8, %tmp8
+ %tmp10 = load i64, i64 addrspace(1)* %tmp4, align 8
+ %tmp11 = add nsw i64 %tmp9, %tmp10
+ store i64 %tmp11, i64 addrspace(1)* %tmp4, align 8
+ ret void
+}
+
+attributes #0 = { norecurse nounwind "polly.skip.fn" }
+
+!nvvm.annotations = !{!0}
+
+!0 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i64, i64)* @spam, !"maxntidx", i64 1, !"maxntidy", i64 1, !"maxntidz", i64 1}
More information about the llvm-commits
mailing list