[llvm] AMDGPU: Handle demanded subvectors for readfirstlane (PR #128648)
Pravin Jagtap via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 7 02:20:59 PST 2025
================
@@ -1574,35 +1574,59 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
const unsigned LastElt = DemandedElts.getActiveBits() - 1;
const unsigned MaskLen = LastElt - FirstElt + 1;
- // TODO: Handle general subvector extract.
- if (MaskLen != 1)
+ unsigned OldNumElts = VT->getNumElements();
+ if (MaskLen == OldNumElts && MaskLen != 1)
return nullptr;
Type *EltTy = VT->getElementType();
- if (!isTypeLegal(EltTy))
+ Type *NewVT = MaskLen == 1 ? EltTy : FixedVectorType::get(EltTy, MaskLen);
+
+ // Theoretically we should support these intrinsics for any legal type. Avoid
+ // introducing cases that aren't direct register types like v3i16.
+ if (!isTypeLegal(NewVT))
return nullptr;
Value *Src = II.getArgOperand(0);
- assert(FirstElt == LastElt);
- Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
-
// Make sure convergence tokens are preserved.
// TODO: CreateIntrinsic should allow directly copying bundles
SmallVector<OperandBundleDef, 2> OpBundles;
II.getOperandBundlesAsDefs(OpBundles);
Module *M = IC.Builder.GetInsertBlock()->getModule();
- Function *Remangled = Intrinsic::getOrInsertDeclaration(
- M, II.getIntrinsicID(), {Extract->getType()});
+ Function *Remangled =
+ Intrinsic::getOrInsertDeclaration(M, II.getIntrinsicID(), {NewVT});
+
+ if (MaskLen == 1) {
+ Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
+
+ // TODO: Preserve callsite attributes?
+ CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
+
+ return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()),
+ NewCall, FirstElt);
+ }
+
+ SmallVector<int> ExtractMask(MaskLen, -1);
+ for (unsigned I = 0; I != MaskLen; ++I) {
+ if (DemandedElts[FirstElt + I])
+ ExtractMask[I] = FirstElt + I;
+ }
+
+ Value *Extract = IC.Builder.CreateShuffleVector(Src, ExtractMask);
// TODO: Preserve callsite attributes?
CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
+ SmallVector<int> InsertMask(OldNumElts, -1);
+ for (unsigned I = 0; I != MaskLen; ++I) {
+ if (DemandedElts[FirstElt + I])
+ InsertMask[FirstElt + I] = I;
----------------
pravinjagtap wrote:
Nit: Can we not fold this in upper loop ?
https://github.com/llvm/llvm-project/pull/128648
More information about the llvm-commits
mailing list