[llvm] 144736b - [VPlan] Don't fold live ins with both scalar and vector operands (#154067)

Mon Aug 18 21:10:57 PDT 2025

Author: Luke Lau
Date: 2025-08-19T04:10:53Z
New Revision: 144736b07e03f27888860f6ad7aa13a77f7c9731

URL: https://github.com/llvm/llvm-project/commit/144736b07e03f27888860f6ad7aa13a77f7c9731
DIFF: https://github.com/llvm/llvm-project/commit/144736b07e03f27888860f6ad7aa13a77f7c9731.diff

LOG: [VPlan] Don't fold live ins with both scalar and vector operands (#154067)

If we end up with a extract_element VPInstruction where both operands
are live-ins, we will try to fold the live-ins even though the first
operand is a vector whilst the live-in is scalar.

This fixes it by just returning the vector live-in instead of calling
the folder, and removes the handling for insertelement where we aren't
able to do the fold. From some quick testing we previously never hit
this fold anyway, and were probably just missing test coverage.

Fixes #154045

Added: 
    llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 81088c9a81392..45d9ee1753c3a 100644

--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -980,10 +980,11 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
     return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
                           Ops[1],
                           cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
-  case Instruction::InsertElement:
-    return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
+  // An extract of a live-in is an extract of a broadcast, so return the
+  // broadcasted element.
   case Instruction::ExtractElement:
-    return Folder.FoldExtractElement(Ops[0], Ops[1]);
+    assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
+    return Ops[0];
   }
   return nullptr;
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll b/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll
new file mode 100644
index 0000000000000..39baa27c8a4ff
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pr154045-dont-fold-extractelement-livein.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -force-vector-width=2 -force-widen-divrem-via-safe-divisor=false -S %s | FileCheck %s
+
+; Make sure we don't try to fold a Instruction::ExtractElement ir<0>, ir<0>,
+; since we can't materialize the live-in for the vector operand.
+
+define void @pr154045(ptr %p, i1 %c, i64 %x) {
+; CHECK-LABEL: define void @pr154045(
+; CHECK-SAME: ptr [[P:%.*]], i1 [[C:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
+; CHECK:       [[PRED_SREM_IF]]:
+; CHECK-NEXT:    br label %[[PRED_SREM_CONTINUE]]
+; CHECK:       [[PRED_SREM_CONTINUE]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
+; CHECK:       [[PRED_SREM_IF1]]:
+; CHECK-NEXT:    br label %[[PRED_SREM_CONTINUE2]]
+; CHECK:       [[PRED_SREM_CONTINUE2]]:
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT:    br i1 [[C]], label %[[LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[REM:%.*]] = srem i64 0, [[X]]
+; CHECK-NEXT:    br label %[[LATCH]]
+; CHECK:       [[LATCH]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[REM]], %[[ELSE]] ], [ 0, %[[LOOP]] ]
+; CHECK-NEXT:    [[PHI_TRUNC:%.*]] = trunc i64 [[PHI]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[PHI_TRUNC]], 0
+; CHECK-NEXT:    store i32 [[SHL]], ptr [[P]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  br i1 %c, label %latch, label %else
+
+else:
+  %rem = srem i64 0, %x
+  br label %latch
+
+latch:
+  %phi = phi i64 [ %rem, %else ], [ 0, %loop ]
+  %phi.trunc = trunc i64 %phi to i32
+  %shl = shl i32 %phi.trunc, 0
+  store i32 %shl, ptr %p
+  %iv.next = add i64 %iv, 1
+  %exitcond = icmp eq i64 %iv, 1
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}