[llvm] [IA][RISCV] Support VP loads/stores in InterleavedAccessPass (PR #120490)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 11:26:37 PST 2025
================
@@ -22529,6 +22530,233 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
return true;
}
+static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
+ assert(N);
+ if (N == 1)
+ return true;
+
+ if (isPowerOf2_32(N)) {
+ KnownBits KB = llvm::computeKnownBits(V, DL);
+ return KB.countMinTrailingZeros() >= Log2_32(N);
+ }
+
+ using namespace PatternMatch;
+ // Right now we're only recognizing the simplest pattern.
+ uint64_t C;
+ return match(V, m_c_Mul(m_Value(), m_ConstantInt(C))) && C && C % N == 0;
+}
+
+/// Lower an interleaved vp.load into a vlsegN intrinsic.
+///
+/// E.g. Lower an interleaved vp.load (Factor = 2):
+/// %l = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %ptr,
+/// %mask,
+/// i32 %wide.rvl)
+/// %dl = tail call { <vscale x 32 x i8>, <vscale x 32 x i8> }
+/// @llvm.vector.deinterleave2.nxv64i8(
+/// <vscale x 64 x i8> %l)
+/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 0
+/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 1
+///
+/// Into:
+/// %rvl = udiv %wide.rvl, 2
+/// %sl = call { <vscale x 32 x i8>, <vscale x 32 x i8> }
+/// @llvm.riscv.vlseg2.mask.nxv32i8.i64(<vscale x 32 x i8> undef,
+/// <vscale x 32 x i8> undef,
+/// ptr %ptr,
+/// %mask,
+/// i64 %rvl,
+/// i64 1)
+/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 0
+/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 1
+///
+/// NOTE: the deinterleave2 intrinsic won't be touched and is expected to be
+/// removed by the caller
+bool RISCVTargetLowering::lowerDeinterleavedIntrinsicToVPLoad(
+ VPIntrinsic *Load, Value *Mask,
+ ArrayRef<Value *> DeinterleaveResults) const {
+ assert(Mask && "Expect a valid mask");
+ assert(Load->getIntrinsicID() == Intrinsic::vp_load &&
+ "Unexpected intrinsic");
+
+ const unsigned Factor = DeinterleaveResults.size();
+
+ auto *WideVTy = dyn_cast<ScalableVectorType>(Load->getType());
+ // TODO: Support fixed vectors.
+ if (!WideVTy)
+ return false;
+
+ unsigned WideNumElements = WideVTy->getElementCount().getKnownMinValue();
+ assert(WideNumElements % Factor == 0 &&
+ "ElementCount of a wide load must be divisible by interleave factor");
+ auto *VTy =
+ VectorType::get(WideVTy->getScalarType(), WideNumElements / Factor,
+ WideVTy->isScalableTy());
+ auto &DL = Load->getModule()->getDataLayout();
+ Align Alignment = Load->getParamAlign(0).value_or(
+ DL.getABITypeAlign(WideVTy->getElementType()));
+ if (!isLegalInterleavedAccessType(
+ VTy, Factor, Alignment,
+ Load->getArgOperand(0)->getType()->getPointerAddressSpace(), DL))
+ return false;
+
+ IRBuilder<> Builder(Load);
+ Value *WideEVL = Load->getArgOperand(2);
+ // Conservatively check if EVL is a multiple of factor, otherwise some
+ // (trailing) elements might be lost after the transformation.
+ if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor))
+ return false;
+
+ auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
+ Value *EVL = Builder.CreateZExtOrTrunc(
+ Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
+ XLenTy);
+
+ static const Intrinsic::ID IntrMaskIds[] = {
+ Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
+ Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
+ Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
+ Intrinsic::riscv_vlseg8_mask,
+ };
+
+ unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
+ unsigned NumElts = VTy->getElementCount().getKnownMinValue();
+ Type *VecTupTy = TargetExtType::get(
+ Load->getContext(), "riscv.vector.tuple",
+ ScalableVectorType::get(Type::getInt8Ty(Load->getContext()),
+ NumElts * SEW / 8),
+ Factor);
+
+ Value *PoisonVal = PoisonValue::get(VecTupTy);
+
+ Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
+ Load->getModule(), IntrMaskIds[Factor - 2],
+ {VecTupTy, Mask->getType(), EVL->getType()});
+
+ Value *Operands[] = {
+ PoisonVal,
+ Load->getArgOperand(0),
+ Mask,
+ EVL,
+ ConstantInt::get(XLenTy, RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC),
+ ConstantInt::get(XLenTy, Log2_64(SEW))};
+
+ CallInst *VlsegN = Builder.CreateCall(VlsegNFunc, Operands);
+
+ SmallVector<Type *, 8> AggrTypes{Factor, VTy};
+ Value *Return =
+ PoisonValue::get(StructType::get(Load->getContext(), AggrTypes));
+ Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration(
+ Load->getModule(), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy});
+ for (unsigned i = 0; i < Factor; ++i) {
+ Value *VecExtract =
+ Builder.CreateCall(VecExtractFunc, {VlsegN, Builder.getInt32(i)});
+ Return = Builder.CreateInsertValue(Return, VecExtract, i);
+ }
+
+ for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
+ // We have to create a brand new ExtractValue to replace each
+ // of these old ExtractValue instructions.
+ Value *NewEV =
+ Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
+ DIO->replaceAllUsesWith(NewEV);
+ }
+
+ return true;
+}
+
+/// Lower an interleaved vp.store into a vssegN intrinsic.
+///
+/// E.g. Lower an interleaved vp.store (Factor = 2):
+///
+/// %is = tail call <vscale x 64 x i8>
+/// @llvm.vector.interleave2.nxv64i8(
+/// <vscale x 32 x i8> %load0,
+/// <vscale x 32 x i8> %load1
+/// %wide.rvl = shl nuw nsw i32 %rvl, 1
+/// tail call void @llvm.vp.store.nxv64i8.p0(
+/// <vscale x 64 x i8> %is, ptr %ptr,
+/// %mask,
+/// i32 %wide.rvl)
+///
+/// Into:
+/// call void @llvm.riscv.vsseg2.mask.nxv32i8.i64(
+/// <vscale x 32 x i8> %load1,
+/// <vscale x 32 x i8> %load2, ptr %ptr,
+/// %mask,
+/// i64 %rvl)
+bool RISCVTargetLowering::lowerInterleavedIntrinsicToVPStore(
+ VPIntrinsic *Store, Value *Mask,
+ ArrayRef<Value *> InterleaveOperands) const {
+ assert(Mask && "Expect a valid mask");
+ assert(Store->getIntrinsicID() == Intrinsic::vp_store &&
+ "Unexpected intrinsic");
+
+ const unsigned Factor = InterleaveOperands.size();
+
+ auto *VTy = dyn_cast<ScalableVectorType>(InterleaveOperands[0]->getType());
+ // TODO: Support fixed vectors.
+ if (!VTy)
+ return false;
+
+ const DataLayout &DL = Store->getDataLayout();
+ Align Alignment = Store->getParamAlign(1).value_or(
+ DL.getABITypeAlign(VTy->getElementType()));
+ if (!isLegalInterleavedAccessType(
+ VTy, Factor, Alignment,
+ Store->getArgOperand(1)->getType()->getPointerAddressSpace(), DL))
+ return false;
+
+ IRBuilder<> Builder(Store);
+ Value *WideEVL = Store->getArgOperand(3);
+ // Conservatively check if EVL is a multiple of factor, otherwise some
+ // (trailing) elements might be lost after the transformation.
+ if (!isMultipleOfN(WideEVL, Store->getDataLayout(), Factor))
+ return false;
+
+ auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
+ Value *EVL = Builder.CreateZExtOrTrunc(
+ Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
+ XLenTy);
+
+ static const Intrinsic::ID IntrMaskIds[] = {
+ Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
+ Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
+ Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
+ Intrinsic::riscv_vsseg8_mask,
+ };
+
+ unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
+ unsigned NumElts = VTy->getElementCount().getKnownMinValue();
+ Type *VecTupTy = TargetExtType::get(
+ Store->getContext(), "riscv.vector.tuple",
+ ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
+ NumElts * SEW / 8),
+ Factor);
+
+ Function *VecInsertFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), Intrinsic::riscv_tuple_insert, {VecTupTy, VTy});
+ Value *StoredVal = PoisonValue::get(VecTupTy);
+ for (unsigned i = 0; i < Factor; ++i)
+ StoredVal = Builder.CreateCall(
+ VecInsertFunc, {StoredVal, InterleaveOperands[i], Builder.getInt32(i)});
+
+ SmallVector<Value *, 5> Operands;
+ Operands.push_back(StoredVal);
+ Operands.push_back(Store->getArgOperand(1));
----------------
mshockwave wrote:
Fixed.
https://github.com/llvm/llvm-project/pull/120490
More information about the llvm-commits
mailing list