[llvm] [VPlan] Don't fold live ins with both scalar and vector operands (PR #154067)

Sun Aug 17 23:24:44 PDT 2025

https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/154067

If we end up with a extract_element VPInstruction where both operands are live-ins, we will try to fold the live-ins even though the first operand is vector and the live-in is scalar.

I don't think it's possible for us to fold these. We can't create a vector version of the live-in either because we don't know VF at this stage.

This removes the handling for opcodes that may have both scalar and vector operands. From some quick testing we previously never hit these folds anyway, and were probably just missing test coverage.

Fixes #154045


>From 4c0ebae46ad61547cca5d65e365ebdb05f5a7401 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 18 Aug 2025 14:19:45 +0800
Subject: [PATCH] [VPlan] Don't fold live ins with both scalar and vector
 operands

If we end up with a extract_element VPInstruction where both operands are live-ins, we will try to fold the live-ins even though the first operand is vector and the live-in is scalar.

I don't think it's possible for us to fold these. We can't create a vector version of the live-in either because we don't know VF at this stage.

This removes the handling for opcodes that may have both scalar and vector operands. From some quick testing we previously never hit these folds anyway, and were probably just missing test coverage.

Fixes #154045
---
 .../Transforms/Vectorize/VPlanTransforms.cpp  |  5 --
 ...r154045-dont-fold-extractelement-livein.ll | 75 +++++++++++++++++++
 2 files changed, 75 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/X86/pr154045-dont-fold-extractelement-livein.ll

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index de0c1e4d177bb..022d9b749f978 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -976,14 +976,9 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
                           RFlags.getGEPNoWrapFlags());
   }
   case VPInstruction::PtrAdd:
-  case VPInstruction::WidePtrAdd:
     return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
                           Ops[1],
                           cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
-  case Instruction::InsertElement:
-    return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
-  case Instruction::ExtractElement:
-    return Folder.FoldExtractElement(Ops[0], Ops[1]);
   }
   return nullptr;
 }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr154045-dont-fold-extractelement-livein.ll b/llvm/test/Transforms/LoopVectorize/X86/pr154045-dont-fold-extractelement-livein.ll
new file mode 100644
index 0000000000000..185071707b6e8
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr154045-dont-fold-extractelement-livein.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+; Make sure we don't try to fold a Instruction::ExtractElement ir<0>, ir<0>, ,
+; since we can't materialize the live-in for the vector operand.
+
+target triple = "x86_64"
+
+define void @test(ptr %p, i1 %c, i64 %x) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]], i1 [[C:%.*]], i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
+; CHECK:       [[PRED_SREM_IF]]:
+; CHECK-NEXT:    br label %[[PRED_SREM_CONTINUE]]
+; CHECK:       [[PRED_SREM_CONTINUE]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
+; CHECK:       [[PRED_SREM_IF1]]:
+; CHECK-NEXT:    br label %[[PRED_SREM_CONTINUE2]]
+; CHECK:       [[PRED_SREM_CONTINUE2]]:
+; CHECK-NEXT:    store i32 0, ptr [[P]], align 4
+; CHECK-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT:    br i1 [[C]], label %[[LATCH]], label %[[ELSE:.*]]
+; CHECK:       [[ELSE]]:
+; CHECK-NEXT:    [[REM:%.*]] = srem i64 0, [[X]]
+; CHECK-NEXT:    br label %[[LATCH]]
+; CHECK:       [[LATCH]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[REM]], %[[ELSE]] ], [ 0, %[[LOOP]] ]
+; CHECK-NEXT:    [[PHI_TRUNC:%.*]] = trunc i64 [[PHI]] to i32
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[PHI_TRUNC]], 0
+; CHECK-NEXT:    store i32 [[SHL]], ptr [[P]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  br i1 %c, label %latch, label %else
+
+else:
+  %rem = srem i64 0, %x
+  br label %latch
+
+latch:
+  %phi = phi i64 [ %rem, %else], [ 0, %loop ]
+  %phi.trunc = trunc i64 %phi to i32
+  %shl = shl i32 %phi.trunc, 0
+  store i32 %shl, ptr %p
+  %iv.next = add i64 %iv, 1
+  %exitcond = icmp eq i64 %iv, 1
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}