[PATCH] D124219: [AMDGPU] Fine tune LDS misaligned access speed
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 21 17:19:03 PDT 2022
rampitec created this revision.
rampitec added reviewers: arsenm, foad.
Herald added subscribers: hsmhsm, kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, kzhuravl.
Herald added a project: All.
rampitec requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
https://reviews.llvm.org/D124219
Files:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1551,7 +1551,9 @@
// ds_write2_b32 depending on the alignment. In either case with either
// alignment there is no faster way of doing this.
if (IsFast)
- *IsFast = 1;
+ *IsFast = (Alignment >= RequiredAlignment) ? 64
+ : (Alignment < Align(4)) ? 32
+ : 1;
return true;
}
@@ -1570,7 +1572,9 @@
// be more of them, so overall we will pay less penalty issuing a single
// instruction.
if (IsFast)
- *IsFast = Alignment >= RequiredAlignment || Alignment < Align(4);
+ *IsFast = (Alignment >= RequiredAlignment) ? 96
+ : (Alignment < Align(4)) ? 32
+ : 1;
return true;
}
@@ -1591,7 +1595,9 @@
// will be more of them, so overall we will pay less penalty issuing a
// single instruction.
if (IsFast)
- *IsFast= Alignment >= RequiredAlignment || Alignment < Align(4);
+ *IsFast = (Alignment >= RequiredAlignment) ? 128
+ : (Alignment < Align(4)) ? 32
+ : 1;
return true;
}
@@ -1604,7 +1610,7 @@
}
if (IsFast)
- *IsFast = Alignment >= RequiredAlignment;
+ *IsFast = (Alignment >= RequiredAlignment) ? Size : 0;
return Alignment >= RequiredAlignment ||
Subtarget->hasUnalignedDSAccessEnabled();
@@ -1662,22 +1668,8 @@
bool SITargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
unsigned *IsFast) const {
- bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
- Alignment, Flags, IsFast);
-
- if (Allow && IsFast && Subtarget->hasUnalignedDSAccessEnabled() &&
- (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
- AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
- // Lie it is fast if +unaligned-access-mode is passed so that DS accesses
- // get vectorized. We could use ds_read2_b*/ds_write2_b* instructions on a
- // misaligned data which is faster than a pair of ds_read_b*/ds_write_b*
- // which would be equally misaligned.
- // This is only used by the common passes, selection always calls the
- // allowsMisalignedMemoryAccessesImpl version.
- *IsFast= 1;
- }
-
- return Allow;
+ return allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
+ Alignment, Flags, IsFast);
}
EVT SITargetLowering::getOptimalMemOpType(
@@ -8560,7 +8552,7 @@
auto Flags = Load->getMemOperand()->getFlags();
if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,
Load->getAlign(), Flags, &Fast) &&
- Fast)
+ Fast > 1)
return SDValue();
if (MemVT.isVector())
@@ -9059,7 +9051,7 @@
auto Flags = Store->getMemOperand()->getFlags();
if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,
Store->getAlign(), Flags, &Fast) &&
- Fast)
+ Fast > 1)
return SDValue();
if (VT.isVector())
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D124219.424338.patch
Type: text/x-patch
Size: 3556 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220422/dd003616/attachment.bin>
More information about the llvm-commits
mailing list