[llvm] r265389 - [NVPTX] Handle ldg created from sign-/zero-extended load
Justin Holewinski via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 5 05:38:01 PDT 2016
Author: jholewinski
Date: Tue Apr 5 07:38:01 2016
New Revision: 265389
URL: http://llvm.org/viewvc/llvm-project?rev=265389&view=rev
Log:
[NVPTX] Handle ldg created from sign-/zero-extended load
Reviewers: jingyue
Subscribers: jholewinski
Differential Revision: http://reviews.llvm.org/D18053
Added:
llvm/trunk/test/CodeGen/NVPTX/bug26185.ll
Modified:
llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp?rev=265389&r1=265388&r2=265389&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp Tue Apr 5 07:38:01 2016
@@ -1286,7 +1286,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(
MemSDNode *Mem;
bool IsLDG = true;
- // If this is an LDG intrinsic, the address is the third operand. Its its an
+ // If this is an LDG intrinsic, the address is the third operand. If its an
// LDG/LDU SD node (from custom vector handling), then its the second operand
if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
Op1 = N->getOperand(2);
@@ -1317,10 +1317,23 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(
SDValue Base, Offset, Addr;
EVT EltVT = Mem->getMemoryVT();
+ unsigned NumElts = 1;
if (EltVT.isVector()) {
+ NumElts = EltVT.getVectorNumElements();
EltVT = EltVT.getVectorElementType();
}
+ // Build the "promoted" result VTList for the load. If we are really loading
+ // i8s, then the return type will be promoted to i16 since we do not expose
+ // 8-bit registers in NVPTX.
+ EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
+ SmallVector<EVT, 5> InstVTs;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ InstVTs.push_back(NodeVT);
+ }
+ InstVTs.push_back(MVT::Other);
+ SDVTList InstVTList = CurDAG->getVTList(InstVTs);
+
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
@@ -1461,7 +1474,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(
}
SDValue Ops[] = { Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (TM.is64Bit()) {
@@ -1750,7 +1763,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(
SDValue Ops[] = { Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
} else {
if (TM.is64Bit()) {
switch (N->getOpcode()) {
@@ -2037,13 +2050,77 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(
}
SDValue Ops[] = { Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = Mem->getMemOperand();
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ // For automatic generation of LDG (through SelectLoad[Vector], not the
+ // intrinsics), we may have an extending load like:
+ //
+ // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
+ //
+ // Since we load an i8 value, the matching logic above will have selected an
+ // LDG instruction that reads i8 and stores it in an i16 register (NVPTX does
+ // not expose 8-bit registers):
+ //
+ // i16,ch = INT_PTX_LDG_GLOBAL_i8areg64 t7, t0
+ //
+ // To get the correct type in this case, truncate back to i8 and then extend
+ // to the original load type.
+ EVT OrigType = N->getValueType(0);
+ LoadSDNode *LDSD = dyn_cast<LoadSDNode>(N);
+ if (LDSD && EltVT == MVT::i8 && OrigType.getScalarSizeInBits() >= 32) {
+ unsigned CvtOpc = 0;
+
+ switch (LDSD->getExtensionType()) {
+ default:
+ llvm_unreachable("An extension is required for i8 loads");
+ break;
+ case ISD::SEXTLOAD:
+ switch (OrigType.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled integer load type");
+ break;
+ case MVT::i32:
+ CvtOpc = NVPTX::CVT_s32_s8;
+ break;
+ case MVT::i64:
+ CvtOpc = NVPTX::CVT_s64_s8;
+ break;
+ }
+ break;
+ case ISD::EXTLOAD:
+ case ISD::ZEXTLOAD:
+ switch (OrigType.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled integer load type");
+ break;
+ case MVT::i32:
+ CvtOpc = NVPTX::CVT_u32_u8;
+ break;
+ case MVT::i64:
+ CvtOpc = NVPTX::CVT_u64_u8;
+ break;
+ }
+ break;
+ }
+
+ // For each output value, truncate to i8 (since the upper 8 bits are
+ // undefined) and then extend to the desired type.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Res(LD, i);
+ SDValue OrigVal(N, i);
+
+ SDNode *CvtNode =
+ CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
+ CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32));
+ ReplaceUses(OrigVal, SDValue(CvtNode, 0));
+ }
+ }
+
return LD;
}
Modified: llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td?rev=265389&r1=265388&r2=265389&view=diff
==============================================================================
--- llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td (original)
+++ llvm/trunk/lib/Target/NVPTX/NVPTXInstrInfo.td Tue Apr 5 07:38:01 2016
@@ -319,6 +319,16 @@ let hasSideEffects = 0 in {
// takes a CvtMode immediate that defines the conversion mode to use. It can
// be CvtNONE to omit a conversion mode.
multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
+ def _s8 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int16Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".s8\t$dst, $src;"), []>;
+ def _u8 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int16Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".u8\t$dst, $src;"), []>;
def _s16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
Added: llvm/trunk/test/CodeGen/NVPTX/bug26185.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/NVPTX/bug26185.ll?rev=265389&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/NVPTX/bug26185.ll (added)
+++ llvm/trunk/test/CodeGen/NVPTX/bug26185.ll Tue Apr 5 07:38:01 2016
@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
+
+; Verify that we correctly emit code for i8 ldg/ldu. We do not expose 8-bit
+; registers in the backend, so these loads need special handling.
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; CHECK-LABEL: ex_zext
+define void @ex_zext(i8* noalias readonly %data, i32* %res) {
+entry:
+; CHECK: ld.global.nc.u8
+ %val = load i8, i8* %data
+; CHECK: cvt.u32.u8
+ %valext = zext i8 %val to i32
+ store i32 %valext, i32* %res
+ ret void
+}
+
+; CHECK-LABEL: ex_sext
+define void @ex_sext(i8* noalias readonly %data, i32* %res) {
+entry:
+; CHECK: ld.global.nc.u8
+ %val = load i8, i8* %data
+; CHECK: cvt.s32.s8
+ %valext = sext i8 %val to i32
+ store i32 %valext, i32* %res
+ ret void
+}
+
+; CHECK-LABEL: ex_zext_v2
+define void @ex_zext_v2(<2 x i8>* noalias readonly %data, <2 x i32>* %res) {
+entry:
+; CHECK: ld.global.nc.v2.u8
+ %val = load <2 x i8>, <2 x i8>* %data
+; CHECK: cvt.u32.u16
+ %valext = zext <2 x i8> %val to <2 x i32>
+ store <2 x i32> %valext, <2 x i32>* %res
+ ret void
+}
+
+; CHECK-LABEL: ex_sext_v2
+define void @ex_sext_v2(<2 x i8>* noalias readonly %data, <2 x i32>* %res) {
+entry:
+; CHECK: ld.global.nc.v2.u8
+ %val = load <2 x i8>, <2 x i8>* %data
+; CHECK: cvt.s32.s8
+ %valext = sext <2 x i8> %val to <2 x i32>
+ store <2 x i32> %valext, <2 x i32>* %res
+ ret void
+}
+
+!nvvm.annotations = !{!0,!1,!2,!3}
+!0 = !{void (i8*, i32*)* @ex_zext, !"kernel", i32 1}
+!1 = !{void (i8*, i32*)* @ex_sext, !"kernel", i32 1}
+!2 = !{void (<2 x i8>*, <2 x i32>*)* @ex_zext_v2, !"kernel", i32 1}
+!3 = !{void (<2 x i8>*, <2 x i32>*)* @ex_sext_v2, !"kernel", i32 1}
More information about the llvm-commits
mailing list