[llvm] [AMDGPUInstCombineIntrinsic] Do not narrow 8,16-bit amdgcn_s_buffer_load instrinsics (PR #117997)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 2 15:49:27 PST 2024
================
@@ -1397,15 +1400,37 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
if (OffsetIdx != InvalidOffsetIdx) {
// Clear demanded bits and update the offset.
DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
- auto *Offset = Args[OffsetIdx];
- unsigned SingleComponentSizeInBits =
- IC.getDataLayout().getTypeSizeInBits(EltTy);
- unsigned OffsetAdd =
- UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
- auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
- Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
+ OffsetAdd = UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
}
}
+
+ unsigned NewLoadWidthInBits =
+ SingleComponentSizeInBits * DemandedElts.popcount();
+ if (II.getIntrinsicID() == Intrinsic::amdgcn_s_buffer_load &&
+ NewLoadWidthInBits < 32) {
+ // From the GCN gen3 manual, section 7.4 (Scalar Memory Operations /
+ // Alignment and Bounds Checking) Memory Address - If the memory
+ // address is out-of-range (clamped), the operation is not performed
+ // for any dwords that are out-of-range.
+ //
+ // If we narrow a partially out-of-range <i16x2> load to i16; the i16
----------------
arsenm wrote:
`<2 x i16>`
https://github.com/llvm/llvm-project/pull/117997
More information about the llvm-commits
mailing list