[llvm] 099dcb6 - [AMDGPU] Refactor ds_read/ds_write related select code for better readability.
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 15 19:54:39 PDT 2021
Author: hsmahesha
Date: 2021-04-16T08:24:00+05:30
New Revision: 099dcb68a6a1d00f2170878443f48311368d5c32
URL: https://github.com/llvm/llvm-project/commit/099dcb68a6a1d00f2170878443f48311368d5c32
DIFF: https://github.com/llvm/llvm-project/commit/099dcb68a6a1d00f2170878443f48311368d5c32.diff
LOG: [AMDGPU] Refactor ds_read/ds_write related select code for better readability.
Part of the code related to ds_read/ds_write ISel is refactored, and the
corresponding comment is re-written for better readability, which would help
while implementing any future ds_read/ds_write ISel related modifications.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D100300
Added:
Modified:
llvm/lib/Target/AMDGPU/DSInstructions.td
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 2e20b671f2389..ddd37db5080a1 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -713,39 +713,6 @@ defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
-// Prefer ds_read over ds_read2, all other things being equal, because it has
-// a larger immediate offset range.
-let AddedComplexity = 100 in {
-
-foreach vt = VReg_64.RegTypes in {
-defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
-}
-
-let SubtargetPredicate = isGFX7Plus in {
-
-foreach vt = VReg_96.RegTypes in {
-defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
-}
-
-// For performance reasons restrict this to alignment >= 16 even with
-// unaligned-access-mode. At lower alignments ds_read2_b64 is always a better
-// choice.
-foreach vt = VReg_128.RegTypes in {
-defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
-}
-
-let SubtargetPredicate = HasUnalignedAccessMode in {
-
-foreach vt = VReg_96.RegTypes in {
-defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
-}
-
-} // End SubtargetPredicate = HasUnalignedAccessMode
-
-} // End SubtargetPredicate = isGFX7Plus
-
-} // End AddedComplexity = 100
-
let OtherPredicates = [D16PreservesUnusedBits] in {
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2i16>;
def : DSReadPat_D16<DS_READ_U16_D16_HI, load_d16_hi_local, v2f16>;
@@ -869,33 +836,51 @@ foreach vt = VReg_128.RegTypes in {
defm : DS128Bit8ByteAlignedPat_mc<vt>;
}
-// Prefer ds_write over ds_write2, all other things being equal, because it has
-// a larger immediate offset range.
+// Prefer ds_read over ds_read2 and ds_write over ds_write2, all other things
+// being equal, because it has a larger immediate offset range.
let AddedComplexity = 100 in {
+foreach vt = VReg_64.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B64, vt, "load_align8_local">;
+}
+
foreach vt = VReg_64.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B64, vt, "store_align8_local">;
}
let SubtargetPredicate = isGFX7Plus in {
+foreach vt = VReg_96.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B96, vt, "load_align16_local">;
+}
+
foreach vt = VReg_96.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_align16_local">;
}
-// For performance reasons restrict this to alignment >= 16 even with
-// unaligned-access-mode. At lower alignments ds_write2_b64 is always a better
-// choice.
+foreach vt = VReg_128.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B128, vt, "load_align16_local">;
+}
+
foreach vt = VReg_128.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align16_local">;
}
let SubtargetPredicate = HasUnalignedAccessMode in {
+// FIXME: Is ds_read_b96/ds_write_b96 better choice in unaligned-access-mode?
+foreach vt = VReg_96.RegTypes in {
+defm : DSReadPat_mc <DS_READ_B96, vt, "load_local">;
+}
+
foreach vt = VReg_96.RegTypes in {
defm : DSWritePat_mc <DS_WRITE_B96, vt, "store_local">;
}
+// For performance reasons, *do not* select ds_read_b128/ds_write_b128 in
+// unaligned-access-mode. At lower alignments ds_read2_b64/ds_write2_b64 is
+// always a better choice.
+
} // End SubtargetPredicate = HasUnalignedAccessMode
} // End SubtargetPredicate = isGFX7Plus
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index eb6372e811dd1..726bc87d5a162 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1412,10 +1412,15 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
return true;
}
+ // Either, the alignment requirements are "enabled", or there is an
+ // unaligned LDS access related hardware bug though alignment requirements
+ // are "disabled". In either case, we need to check for proper alignment
+ // requirements.
+ //
if (Size == 64) {
- // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
- // aligned, 8 byte access in a single operation using ds_read2/write2_b32
- // with adjacent offsets.
+ // 8 byte accessing via ds_read/write_b64 require 8-byte alignment, but we
+ // can do a 4 byte aligned, 8 byte access in a single operation using
+ // ds_read2/write2_b32 with adjacent offsets.
bool AlignedBy4 = Alignment >= Align(4);
if (IsFast)
*IsFast = AlignedBy4;
@@ -1423,22 +1428,23 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
return AlignedBy4;
}
if (Size == 96) {
- // ds_read/write_b96 require 16-byte alignment on gfx8 and older.
- bool Aligned = Alignment >= Align(16);
+ // 12 byte accessing via ds_read/write_b96 require 16-byte alignment on
+ // gfx8 and older.
+ bool AlignedBy16 = Alignment >= Align(16);
if (IsFast)
- *IsFast = Aligned;
+ *IsFast = AlignedBy16;
- return Aligned;
+ return AlignedBy16;
}
if (Size == 128) {
- // ds_read/write_b128 require 16-byte alignment on gfx8 and older, but we
- // can do a 8 byte aligned, 16 byte access in a single operation using
- // ds_read2/write2_b64.
- bool Aligned = Alignment >= Align(8);
+ // 16 byte accessing via ds_read/write_b128 require 16-byte alignment on
+ // gfx8 and older, but we can do a 8 byte aligned, 16 byte access in a
+ // single operation using ds_read2/write2_b64.
+ bool AlignedBy8 = Alignment >= Align(8);
if (IsFast)
- *IsFast = Aligned;
+ *IsFast = AlignedBy8;
- return Aligned;
+ return AlignedBy8;
}
}
More information about the llvm-commits
mailing list