[llvm] d6aa4aa - [AMDGPU] Some refactoring after D90404. NFC.
Christudasan Devadasan via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 1 00:55:02 PDT 2020
Author: Christudasan Devadasan
Date: 2020-11-01T13:18:53+05:30
New Revision: d6aa4aa29a25fac45ed250dd34cdbf8dbe8c0eb7
URL: https://github.com/llvm/llvm-project/commit/d6aa4aa29a25fac45ed250dd34cdbf8dbe8c0eb7
DIFF: https://github.com/llvm/llvm-project/commit/d6aa4aa29a25fac45ed250dd34cdbf8dbe8c0eb7.diff
LOG: [AMDGPU] Some refactoring after D90404. NFC.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index b84191d3c03f..aa269080cebe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1566,17 +1566,25 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
return DAG.getMergeValues(Ops, SL);
}
-// Widen a vector load from vec3 to vec4.
-SDValue AMDGPUTargetLowering::WidenVectorLoad(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op,
+ SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
EVT VT = Op.getValueType();
- assert(VT.getVectorNumElements() == 3);
SDValue BasePtr = Load->getBasePtr();
EVT MemVT = Load->getMemoryVT();
SDLoc SL(Op);
const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
unsigned BaseAlign = Load->getAlignment();
+ unsigned NumElements = MemVT.getVectorNumElements();
+
+ // Widen from vec3 to vec4 when the load is at least 8-byte aligned
+ // or 16-byte fully dereferenceable. Otherwise, split the vector load.
+ if (NumElements != 3 ||
+ (BaseAlign < 8 &&
+ !SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout())))
+ return SplitVectorLoad(Op, DAG);
+
+ assert(NumElements == 3);
EVT WideVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index e0c9c0b341f3..ba91ee9c33eb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -124,8 +124,9 @@ class AMDGPUTargetLowering : public TargetLowering {
/// Split a vector load into 2 loads of half the vector.
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
- /// Widen a vector load from vec3 to vec4.
- SDValue WidenVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+ /// Widen a suitably aligned v3 load. For all other cases, split the input
+ /// vector load.
+ SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
/// Split a vector store into 2 stores of half the vector.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 32de18c07c42..f4f56628790b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7986,7 +7986,6 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
ISD::LoadExtType ExtType = Load->getExtensionType();
EVT MemVT = Load->getMemoryVT();
- MachineMemOperand *MMO = Load->getMemOperand();
if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {
if (MemVT == MVT::i16 && isTypeLegal(MVT::i16))
@@ -7997,6 +7996,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Load->getChain();
SDValue BasePtr = Load->getBasePtr();
+ MachineMemOperand *MMO = Load->getMemOperand();
EVT RealMemVT = (MemVT == MVT::i1) ? MVT::i8 : MVT::i16;
@@ -8052,17 +8052,13 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
- bool Is16ByteKnownDereferenceable = MMO->getPointerInfo().isDereferenceable(
- 16, *DAG.getContext(), DAG.getDataLayout());
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
if (!Op->isDivergent() && Alignment >= 4 && NumElements < 32) {
if (MemVT.isPow2VectorType())
return SDValue();
- if (NumElements == 3 && (Alignment >= 8 || Is16ByteKnownDereferenceable))
- return WidenVectorLoad(Op, DAG);
- return SplitVectorLoad(Op, DAG);
+ return WidenOrSplitVectorLoad(Op, DAG);
}
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
@@ -8078,9 +8074,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Alignment >= 4 && NumElements < 32) {
if (MemVT.isPow2VectorType())
return SDValue();
- if (NumElements == 3 && (Alignment >= 8 || Is16ByteKnownDereferenceable))
- return WidenVectorLoad(Op, DAG);
- return SplitVectorLoad(Op, DAG);
+ return WidenOrSplitVectorLoad(Op, DAG);
}
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
@@ -8094,11 +8088,9 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
// v3 loads not supported on SI.
- if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores()) {
- if (Alignment >= 8 || Is16ByteKnownDereferenceable)
- return WidenVectorLoad(Op, DAG);
- return SplitVectorLoad(Op, DAG);
- }
+ if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
+ return WidenOrSplitVectorLoad(Op, DAG);
+
// v3 and v4 loads are supported for private and global memory.
return SDValue();
}
@@ -8121,11 +8113,9 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
// v3 loads not supported on SI.
- if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores()) {
- if (Alignment >= 8 || Is16ByteKnownDereferenceable)
- return WidenVectorLoad(Op, DAG);
- return SplitVectorLoad(Op, DAG);
- }
+ if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
+ return WidenOrSplitVectorLoad(Op, DAG);
+
return SDValue();
default:
llvm_unreachable("unsupported private_element_size");
More information about the llvm-commits
mailing list