[llvm-branch-commits] [llvm-branch] r339105 - Merging r338610:
Hans Wennborg via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 7 00:37:32 PDT 2018
Author: hans
Date: Tue Aug 7 00:37:32 2018
New Revision: 339105
URL: http://llvm.org/viewvc/llvm-project?rev=339105&view=rev
Log:
Merging r338610:
------------------------------------------------------------------------
r338610 | jvesely | 2018-08-01 20:36:07 +0200 (Wed, 01 Aug 2018) | 3 lines
AMDGPU/R600: Convert kernel param loads to use PARAM_I_ADDRESS
Non ext aligned i32 loads are still optimized to use CONSTANT_BUFFER (AS 8)
------------------------------------------------------------------------
Modified:
llvm/branches/release_70/ (props changed)
llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.h
llvm/branches/release_70/test/CodeGen/AMDGPU/kernel-args.ll
llvm/branches/release_70/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
Propchange: llvm/branches/release_70/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Aug 7 00:37:32 2018
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,338552,338554,338569,338599,338658,338665,338682,338703,338709,338751,338762,338817,338968
+/llvm/trunk:155241,338552,338554,338569,338599,338610,338658,338665,338682,338703,338709,338751,338762,338817,338968
Modified: llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=339105&r1=339104&r2=339105&view=diff
==============================================================================
--- llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.cpp Tue Aug 7 00:37:32 2018
@@ -903,7 +903,7 @@ SDValue R600TargetLowering::LowerImplici
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUASI.CONSTANT_BUFFER_0);
+ AMDGPUASI.PARAM_I_ADDRESS);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
@@ -1457,33 +1457,17 @@ SDValue R600TargetLowering::LowerLOAD(SD
return scalarizeVectorLoad(LoadNode, DAG);
}
+ // This is still used for explicit load from addrspace(8)
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
if (ConstantBlock > -1 &&
((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
SDValue Result;
- if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
- isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
+ if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
isa<ConstantSDNode>(Ptr)) {
- SDValue Slots[4];
- for (unsigned i = 0; i < 4; i++) {
- // We want Const position encoded with the following formula :
- // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
- // const_index is Ptr computed by llvm using an alignment of 16.
- // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
- // then div by 4 at the ISel step
- SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
- Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
- }
- EVT NewVT = MVT::v4i32;
- unsigned NumElements = 4;
- if (VT.isVector()) {
- NewVT = VT;
- NumElements = VT.getVectorNumElements();
- }
- Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
+ return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
} else {
+ //TODO: Does this even work?
// non-constant ptr can't be folded, keeps it as a v4f32 load
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
@@ -1622,7 +1606,7 @@ SDValue R600TargetLowering::LowerFormalA
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUASI.CONSTANT_BUFFER_0);
+ AMDGPUASI.PARAM_I_ADDRESS);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
@@ -1646,17 +1630,17 @@ SDValue R600TargetLowering::LowerFormalA
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
+ unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
SDValue Arg = DAG.getLoad(
ISD::UNINDEXED, Ext, VT, DL, Chain,
DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
PtrInfo,
- MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
+ MemVT, Alignment, MachineMemOperand::MONonTemporal |
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
- // 4 is the preferred alignment for the CONSTANT memory space.
InVals.push_back(Arg);
}
return Chain;
@@ -1804,6 +1788,52 @@ SDValue R600TargetLowering::OptimizeSwiz
return BuildVector;
}
+SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
+ SelectionDAG &DAG) const {
+ SDLoc DL(LoadNode);
+ EVT VT = LoadNode->getValueType(0);
+ SDValue Chain = LoadNode->getChain();
+ SDValue Ptr = LoadNode->getBasePtr();
+ assert (isa<ConstantSDNode>(Ptr));
+
+ //TODO: Support smaller loads
+ if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
+ return SDValue();
+
+ if (LoadNode->getAlignment() < 4)
+ return SDValue();
+
+ int ConstantBlock = ConstantAddressBlock(Block);
+
+ SDValue Slots[4];
+ for (unsigned i = 0; i < 4; i++) {
+ // We want Const position encoded with the following formula :
+ // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
+ // const_index is Ptr computed by llvm using an alignment of 16.
+ // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
+ // then div by 4 at the ISel step
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
+ Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
+ }
+ EVT NewVT = MVT::v4i32;
+ unsigned NumElements = 4;
+ if (VT.isVector()) {
+ NewVT = VT;
+ NumElements = VT.getVectorNumElements();
+ }
+ SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
+ if (!VT.isVector()) {
+ Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
+ DAG.getConstant(0, DL, MVT::i32));
+ }
+ SDValue MergedValues[2] = {
+ Result,
+ Chain
+ };
+ return DAG.getMergeValues(MergedValues, DL);
+}
+
//===----------------------------------------------------------------------===//
// Custom DAG Optimizations
//===----------------------------------------------------------------------===//
@@ -2022,6 +2052,16 @@ SDValue R600TargetLowering::PerformDAGCo
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
}
+
+ case ISD::LOAD: {
+ LoadSDNode *LoadNode = cast<LoadSDNode>(N);
+ SDValue Ptr = LoadNode->getBasePtr();
+ if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
+ isa<ConstantSDNode>(Ptr))
+ return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
+ break;
+ }
+
default: break;
}
Modified: llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.h?rev=339105&r1=339104&r2=339105&view=diff
==============================================================================
--- llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.h (original)
+++ llvm/branches/release_70/lib/Target/AMDGPU/R600ISelLowering.h Tue Aug 7 00:37:32 2018
@@ -98,9 +98,11 @@ private:
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
- bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
- SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
- SelectionDAG &DAG) const;
+ bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src,
+ SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm,
+ SelectionDAG &DAG) const;
+ SDValue constBufferLoad(LoadSDNode *LoadNode, int Block,
+ SelectionDAG &DAG) const;
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
};
Modified: llvm/branches/release_70/test/CodeGen/AMDGPU/kernel-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_70/test/CodeGen/AMDGPU/kernel-args.ll?rev=339105&r1=339104&r2=339105&view=diff
==============================================================================
--- llvm/branches/release_70/test/CodeGen/AMDGPU/kernel-args.ll (original)
+++ llvm/branches/release_70/test/CodeGen/AMDGPU/kernel-args.ll Tue Aug 7 00:37:32 2018
@@ -16,13 +16,8 @@
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
-; EG: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV * T1.X, KC0[2].Z,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-
-; CM: LSHR * T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: MOV * T1.X, KC0[2].Z,
+; EGCM: VTX_READ_8{{.*}} #3
+; EGCM: KC0[2].Y
define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
%ext = zext i8 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
@@ -92,14 +87,8 @@ define amdgpu_kernel void @i8_sext_arg(i
; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}}
; HSA-VI: flat_store_dword
-
-; EG: LSHR T0.X, KC0[2].Y, literal.x,
-; EG-NEXT: MOV * T1.X, KC0[2].Z,
-; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-
-; CM: LSHR * T0.X, KC0[2].Y, literal.x,
-; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
-; CM-NEXT: MOV * T1.X, KC0[2].Z,
+; EGCM: VTX_READ_16
+; EGCM: KC0[2].Y
define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
%ext = zext i16 %in to i32
store i32 %ext, i32 addrspace(1)* %out, align 4
Modified: llvm/branches/release_70/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_70/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll?rev=339105&r1=339104&r2=339105&view=diff
==============================================================================
--- llvm/branches/release_70/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll (original)
+++ llvm/branches/release_70/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll Tue Aug 7 00:37:32 2018
@@ -60,8 +60,11 @@ entry:
}
; FUNC-LABEL: {{^}}test_implicit:
-; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56
-; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 56
+; 36 prepended implicit bytes + 4(out pointer) + 4*4 = 56 == KC0[3].Z
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+.[XYZW]]], [[PTR:T[0-9]+.[XYZW]]]
+; EG-NOT: VTX_READ
+; EG-DAG: MOV {{\*?}} [[VAL]], KC0[3].Z
+; EG-DAG: LSHR {{\*? *}}[[PTR]], KC0[2].Y, literal
define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
%implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
@@ -73,7 +76,7 @@ define amdgpu_kernel void @test_implicit
; FUNC-LABEL: {{^}}test_implicit_dyn:
; 36 prepended implicit bytes + 8(out pointer + in) = 44
-; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44
+; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44, #3
define amdgpu_kernel void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 {
%implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
More information about the llvm-branch-commits
mailing list