[llvm] [VPlan] Don't fold live ins with both scalar and vector operands (PR #154067)

Sun Aug 17 23:25:19 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-vectorizers

Author: Luke Lau (lukel97)

<details>
<summary>Changes</summary>

If we end up with a extract_element VPInstruction where both operands are live-ins, we will try to fold the live-ins even though the first operand is vector and the live-in is scalar.

I don't think it's possible for us to fold these. We can't create a vector version of the live-in either because we don't know VF at this stage.

This removes the handling for opcodes that may have both scalar and vector operands. From some quick testing we previously never hit these folds anyway, and were probably just missing test coverage.

Fixes #154045


---
Full diff: https://github.com/llvm/llvm-project/pull/154067.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (-5) 
- (added) llvm/test/Transforms/LoopVectorize/X86/pr154045-dont-fold-extractelement-livein.ll (+75) 


``````````diff

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index de0c1e4d177bb..022d9b749f978 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -976,14 +976,9 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
                           RFlags.getGEPNoWrapFlags());
   }
   case VPInstruction::PtrAdd:
-  case VPInstruction::WidePtrAdd:
     return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
                           Ops[1],
                           cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
-  case Instruction::InsertElement:
-    return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
-  case Instruction::ExtractElement:
-    return Folder.FoldExtractElement(Ops[0], Ops[1]);
   }
   return nullptr;
 }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr154045-dont-fold-extractelement-livein.ll b/llvm/test/Transforms/LoopVectorize/X86/pr154045-dont-fold-extractelement-livein.ll
new file mode 100644
index 0000000000000..185071707b6e8
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr154045-dont-fold-extractelement-livein.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+; Make sure we don't try to fold a Instruction::ExtractElement ir<0>, ir<0>, ,
+; since we can't materialize the live-in for the vector operand.
+
+target triple = "x86_64"
+
+define void @test(ptr %p, i1 %c, i64 %x) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]], i1 [[C:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
+; CHECK:       [[PRED_SREM_IF]]:
+; CHECK-NEXT:    br label %[[PRED_SREM_CONTINUE]]
+; CHECK:       [[PRED_SREM_CONTINUE]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
+; CHECK:       [[PRED_SREM_IF1]]:
+; CHECK-NEXT:    br label %[[PRED_SREM_CONTINUE2]]
+; CHECK:       [[PRED_SREM_CONTINUE2]]:
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT:    br i1 [[C]], label %[[LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[REM:%.*]] = srem i64 0, [[X]]
+; CHECK-NEXT:    br label %[[LATCH]]
+; CHECK:       [[LATCH]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[REM]], %[[ELSE]] ], [ 0, %[[LOOP]] ]
+; CHECK-NEXT:    [[PHI_TRUNC:%.*]] = trunc i64 [[PHI]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[PHI_TRUNC]], 0
+; CHECK-NEXT:    store i32 [[SHL]], ptr [[P]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  br i1 %c, label %latch, label %else
+
+else:
+  %rem = srem i64 0, %x
+  br label %latch
+
+latch:
+  %phi = phi i64 [ %rem, %else], [ 0, %loop ]
+  %phi.trunc = trunc i64 %phi to i32
+  %shl = shl i32 %phi.trunc, 0
+  store i32 %shl, ptr %p
+  %iv.next = add i64 %iv, 1
+  %exitcond = icmp eq i64 %iv, 1
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/154067