[llvm] [VPlan] Add ExtractLane VPInst to extract across multiple parts. (PR #148817)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 23 06:01:34 PDT 2025


================
@@ -860,6 +860,31 @@ Value *VPInstruction::generate(VPTransformState &State) {
       Res = Builder.CreateOr(Res, State.get(Op));
     return Builder.CreateOrReduce(Res);
   }
+  case VPInstruction::ExtractLane: {
+    Value *LaneToExtract = State.get(getOperand(0), true);
+    Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0));
+    Value *Res = nullptr;
+    Value *RuntimeVF = getRuntimeVF(State.Builder, IdxTy, State.VF);
+
+    for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {
+      Value *VectorStart =
+          Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));
+      Value *VectorIdx = Idx == 1
+                             ? LaneToExtract
+                             : Builder.CreateSub(LaneToExtract, VectorStart);
----------------
lukel97 wrote:

Will this generate extracts from negative indices? E.g. extractlane 3, a, b for VF = 4 will cause the extract for b to be at -1. Which I think is treated as unsigned according to the langref, and on neon that would be lowered to an illegal memory access? E.g. I thought the select might have blocked the poison but we always perform two loads here:

```llvm
define i32 @f(<4 x i32> %v, i32 %idx1, i32 %idx2, i1 %b) {
  %x = extractelement <4 x i32> %v, i32 %idx1
  %y = extractelement <4 x i32> %v, i32 %idx2
  %z = select i1 %b, i32 %x, i32 %y
  ret i32 %z
}
```

```asm
f:                                      // @f
	.cfi_startproc
// %bb.0:
	sub	sp, sp, #16
	.cfi_def_cfa_offset 16
	mov	x8, sp
	mov	x9, sp
                                        // kill: def $w1 killed $w1 def $x1
                                        // kill: def $w0 killed $w0 def $x0
	str	q0, [sp]
	bfi	x9, x0, #2, #2
	bfi	x8, x1, #2, #2
	tst	w2, #0x1
	ldr	w9, [x9]
	ldr	w8, [x8]
	csel	w0, w9, w8, ne
	add	sp, sp, #16
	ret
```

An alternative is that we could concatenate the vectors with `concatenateVectors` and then do an extractelement.

https://github.com/llvm/llvm-project/pull/148817


More information about the llvm-commits mailing list