[llvm] [VPlan] Add ExtractLane VPInst to extract across multiple parts. (PR #148817)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 23 06:01:34 PDT 2025
================
@@ -860,6 +860,31 @@ Value *VPInstruction::generate(VPTransformState &State) {
Res = Builder.CreateOr(Res, State.get(Op));
return Builder.CreateOrReduce(Res);
}
+ case VPInstruction::ExtractLane: {
+ Value *LaneToExtract = State.get(getOperand(0), true);
+ Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
+ Value *Res = nullptr;
+ Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF);
+
+ for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
+ Value *VectorStart =
+ Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
+ Value *VectorIdx = Idx == 1
+ ? LaneToExtract
+ : Builder.CreateSub(LaneToExtract, VectorStart);
----------------
lukel97 wrote:
Will this generate extracts from negative indices? E.g. extractlane 3, a, b for VF = 4 will cause the extract for b to be at -1. Which I think is treated as unsigned according to the langref, and on neon that would be lowered to an illegal memory access? E.g. I thought the select might have blocked the poison but we always perform two loads here:
```llvm
define i32 @f(<4 x i32> %v, i32 %idx1, i32 %idx2, i1 %b) {
%x = extractelement <4 x i32> %v, i32 %idx1
%y = extractelement <4 x i32> %v, i32 %idx2
%z = select i1 %b, i32 %x, i32 %y
ret i32 %z
}
```
```asm
f: // @f
.cfi_startproc
// %bb.0:
sub sp, sp, #16
.cfi_def_cfa_offset 16
mov x8, sp
mov x9, sp
// kill: def $w1 killed $w1 def $x1
// kill: def $w0 killed $w0 def $x0
str q0, [sp]
bfi x9, x0, #2, #2
bfi x8, x1, #2, #2
tst w2, #0x1
ldr w9, [x9]
ldr w8, [x8]
csel w0, w9, w8, ne
add sp, sp, #16
ret
```
An alternative is that we could concatenate the vectors with `concatenateVectors` and then do an extractelement.
https://github.com/llvm/llvm-project/pull/148817
More information about the llvm-commits
mailing list