[llvm] [NVPTX] support packed f32 instructions for sm_100+ (PR #126337)
Princeton Ferro via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 10 20:31:27 PDT 2025
================
@@ -330,22 +329,30 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
}
ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
- for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
- EVT VT = TempVTs[i];
- uint64_t Off = TempOffsets[i];
- // Split vectors into individual elements, except for v2f16, which
- // we will pass as a single scalar.
+ for (auto [VT, Off] : zip(TempVTs, TempOffsets)) {
+ // Split vectors into individual elements, except for packed types
if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
// We require power-of-2 sized vectors because
// TargetLoweringBase::getVectorTypeBreakdown() which is invoked in
// ComputePTXValueVTs() cannot currently break down non-power-of-2 sized
// vectors.
- if ((Is16bitsType(EltVT.getSimpleVT())) && NumElts % 2 == 0 &&
- isPowerOf2_32(NumElts)) {
- // Vectors with an even number of f16 elements will be passed to
- // us as an array of v2f16/v2bf16 elements. We must match this so we
+
+ // Special case handling for packed i8s.
+ if (EltVT.getSimpleVT() == MVT::i8 &&
+ ((NumElts % 4 == 0 && isPowerOf2_32(NumElts)) || NumElts == 3)) {
+ // v*i8 are formally lowered as v4i8
+ EltVT = MVT::v4i8;
+ NumElts = (NumElts + 3) / 4;
+ } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) {
+ // v2i8 is promoted to v2i16
+ NumElts = 1;
+ EltVT = MVT::v2i8;
+ } else if (isPackedElementTy(EltVT) && NumElts % 2 == 0 &&
+ isPowerOf2_32(NumElts)) {
----------------
Prince781 wrote:
Okay, I redid this section to rely on a declared list of supported types (see changes in `NVPTXUtilities.h`) and added more comments.
https://github.com/llvm/llvm-project/pull/126337
More information about the llvm-commits
mailing list