[llvm] [InstCombine] Fix phi scalarization with binop (PR #169120)
Meredith Julian via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 21 15:25:14 PST 2025
https://github.com/mjulian31 updated https://github.com/llvm/llvm-project/pull/169120
>From 91092805a840eddb14d5ae22ade026b3ddecebeb Mon Sep 17 00:00:00 2001
From: Meredith Julian <mjulian at nvidia.com>
Date: Fri, 21 Nov 2025 14:07:02 -0800
Subject: [PATCH 1/2] instcombine scalarization fix
---
.../InstCombine/InstCombineVectorOps.cpp | 11 +++--
.../Transforms/InstCombine/scalarization.ll | 45 +++++++++++++++++++
2 files changed, 53 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 18a45c6799bac..f58de35585123 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -140,8 +140,8 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
Value *Elt = EI.getIndexOperand();
// If the operand is the PHI induction variable:
if (PHIInVal == PHIUser) {
- // Scalarize the binary operation. Its first operand is the
- // scalar PHI, and the second operand is extracted from the other
+ // Scalarize the binary operation. One operand is the
+ // scalar PHI, and the other is extracted from the other
// vector operand.
BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
@@ -149,9 +149,14 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
ExtractElementInst::Create(B0->getOperand(opId), Elt,
B0->getOperand(opId)->getName() + ".Elt"),
B0->getIterator());
+ // Preserve operand order for binary operation to preserve semantics of
+ // non-commutative operations.
+ Value *FirstOp = (B0->getOperand(0) == PN) ? scalarPHI : Op;
+ Value *SecondOp = (B0->getOperand(0) == PN) ? Op : scalarPHI;
Value *newPHIUser = InsertNewInstWith(
BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
- scalarPHI, Op, B0), B0->getIterator());
+ FirstOp, SecondOp, B0),
+ B0->getIterator());
scalarPHI->addIncoming(newPHIUser, inBB);
} else {
// Scalarize PHI input:
diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll
index a6931b4c41d2d..911bcc32eaa5b 100644
--- a/llvm/test/Transforms/InstCombine/scalarization.ll
+++ b/llvm/test/Transforms/InstCombine/scalarization.ll
@@ -108,6 +108,51 @@ for.end:
ret void
}
+define void @scalarize_phi_sub(ptr %n, ptr %inout) {
+;
+; CHECK-LABEL: @scalarize_phi_sub(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[T0:%.*]] = load volatile float, ptr [[INOUT:%.*]], align 4
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[T1:%.*]] = load i32, ptr [[N:%.*]], align 4
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]]
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK: for.body:
+; CHECK-NEXT: store volatile float [[TMP0]], ptr [[INOUT]], align 4
+; CHECK-NEXT: [[TMP1]] = fsub float 0.000000e+00, [[TMP0]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label [[FOR_COND]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %t0 = load volatile float, ptr %inout, align 4
+ %insert = insertelement <4 x float> undef, float %t0, i32 0
+ %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
+ %insert1 = insertelement <4 x float> undef, float 3.0, i32 0
+ br label %for.cond
+
+for.cond:
+ %x.0 = phi <4 x float> [ %splat, %entry ], [ %sub, %for.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %t1 = load i32, ptr %n, align 4
+ %cmp = icmp ne i32 %i.0, %t1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %t2 = extractelement <4 x float> %x.0, i32 1
+ store volatile float %t2, ptr %inout, align 4
+ %sub = fsub <4 x float> zeroinitializer, %x.0
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret void
+}
+
define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
;
; CHECK-LABEL: @extract_element_binop_splat_constant_index(
>From 0113b250ebb4af79a1c5342cc423a49383f08921 Mon Sep 17 00:00:00 2001
From: Meredith Julian <mjulian at nvidia.com>
Date: Fri, 21 Nov 2025 15:24:32 -0800
Subject: [PATCH 2/2] fix format, fix undef in test
---
llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 8 ++++----
llvm/test/Transforms/InstCombine/scalarization.ll | 5 ++---
2 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index f58de35585123..98e2d9ebe4fc2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -153,10 +153,10 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
// non-commutative operations.
Value *FirstOp = (B0->getOperand(0) == PN) ? scalarPHI : Op;
Value *SecondOp = (B0->getOperand(0) == PN) ? Op : scalarPHI;
- Value *newPHIUser = InsertNewInstWith(
- BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
- FirstOp, SecondOp, B0),
- B0->getIterator());
+ Value *newPHIUser =
+ InsertNewInstWith(BinaryOperator::CreateWithCopiedFlags(
+ B0->getOpcode(), FirstOp, SecondOp, B0),
+ B0->getIterator());
scalarPHI->addIncoming(newPHIUser, inBB);
} else {
// Scalarize PHI input:
diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll
index 911bcc32eaa5b..c4adf756f7756 100644
--- a/llvm/test/Transforms/InstCombine/scalarization.ll
+++ b/llvm/test/Transforms/InstCombine/scalarization.ll
@@ -130,9 +130,8 @@ define void @scalarize_phi_sub(ptr %n, ptr %inout) {
;
entry:
%t0 = load volatile float, ptr %inout, align 4
- %insert = insertelement <4 x float> undef, float %t0, i32 0
- %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
- %insert1 = insertelement <4 x float> undef, float 3.0, i32 0
+ %insert = insertelement <4 x float> poison, float %t0, i32 0
+ %splat = shufflevector <4 x float> %insert, <4 x float> poison, <4 x i32> zeroinitializer
br label %for.cond
for.cond:
More information about the llvm-commits
mailing list