[llvm] [AArch64][ARM] Optimize more `tbl`/`tbx` calls into `shufflevector` (PR #169748)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 4 10:11:24 PST 2025
================
@@ -737,42 +737,122 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
return nullptr;
}
-/// Convert a table lookup to shufflevector if the mask is constant.
-/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
-/// which case we could lower the shufflevector with rev64 instructions
-/// as it's actually a byte reverse.
-static Value *simplifyNeonTbl1(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
+/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
+/// at most two source operands are actually referenced.
+static Instruction *simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC,
+ bool IsExtension) {
// Bail out if the mask is not a constant.
- auto *C = dyn_cast<Constant>(II.getArgOperand(1));
+ auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
if (!C)
return nullptr;
- auto *VecTy = cast<FixedVectorType>(II.getType());
- unsigned NumElts = VecTy->getNumElements();
+ auto *RetTy = cast<FixedVectorType>(II.getType());
+ unsigned NumIndexes = RetTy->getNumElements();
- // Only perform this transformation for <8 x i8> vector types.
- if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
+ // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
+ if (!(RetTy->getElementType()->isIntegerTy(8) &&
+ (NumIndexes == 8 || NumIndexes == 16)))
return nullptr;
- int Indexes[8];
+ // For tbx instructions, the first argument is the "fallback" vector, which
+ // has the same length as the mask and return type.
+ unsigned int StartIndex = (unsigned)IsExtension;
+ auto *SourceTy =
+ cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
+ // Note that the element count of each source vector does *not* need to be the
+ // same as the element count of the return type and mask! All source vectors
+ // must have the same element count as each other, though.
+ unsigned NumElementsPerSource = SourceTy->getNumElements();
+
+ // There are no tbl/tbx intrinsics for which the destination size exceeds the
+ // source size. However, our definitions of the intrinsics, at least in
+ // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
+ // *could* technically happen.
+ if (NumIndexes > NumElementsPerSource) {
+ return nullptr;
+ }
+
+ // The tbl/tbx intrinsics take several source operands followed by a mask
+ // operand.
+ unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
- for (unsigned I = 0; I < NumElts; ++I) {
+ // Map input operands to shuffle indices. This also helpfully deduplicates the
+ // input arguments, in case the same value is passed as an argument multiple
+ // times.
+ SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
+ Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
+ PoisonValue::get(SourceTy)};
+
+ int Indexes[16];
+ for (unsigned I = 0; I < NumIndexes; ++I) {
Constant *COp = C->getAggregateElement(I);
- if (!COp || !isa<ConstantInt>(COp))
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
return nullptr;
- Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
+ if (isa<UndefValue>(COp)) {
+ Indexes[I] = -1;
+ continue;
+ }
- // Make sure the mask indices are in range.
- if ((unsigned)Indexes[I] >= NumElts)
+ uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
+ // The index of the input argument that this index references (0 = first
+ // source argument, etc).
+ unsigned SourceOperandIndex = Index / NumElementsPerSource;
+ // The index of the element at that source operand.
+ unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
+
+ Value *SourceOperand;
+ if (SourceOperandIndex >= NumSourceOperands) {
+ // This index is out of bounds. Map it to index into either the fallback
+ // vector (tbx) or vector of zeroes (tbl).
+ SourceOperandIndex = NumSourceOperands;
+ if (IsExtension) {
+ // For out-of-bounds indices in tbx, choose the `I`th element of the
+ // fallback.
+ SourceOperand = II.getArgOperand(0);
+ SourceOperandElementIndex = I;
+ } else {
+ // Otherwise, choose some element from the dummy vector of zeroes (we'll
+ // always choose the first).
+ SourceOperand = Constant::getNullValue(SourceTy);
+ SourceOperandElementIndex = 0;
+ }
+ } else {
+ SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
+ }
+
+ // The source operand may be the fallback vector, which may not have the
+ // same number of elements as the source vector. In that case, we *could*
+ // choose to extend its length with another shufflevector, but it's simpler
+ // to just bail instead.
+ if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
+ NumElementsPerSource) {
return nullptr;
+ }
+
+ // We now know the source operand referenced by this index. Make it a
+ // shufflevector operand, if it isn't already.
+ unsigned NumSlots = ValueToShuffleSlot.size();
+ // This shuffle references more than two sources, and hence cannot be
+ // represented as a shufflevector.
+ if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand)) {
----------------
davemgreen wrote:
Can drop brackets in a few places here too.
https://github.com/llvm/llvm-project/pull/169748
More information about the llvm-commits
mailing list