[llvm] [InstCombine] Fix phi scalarization with binop (PR #169120)

Meredith Julian via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 21 15:03:31 PST 2025


https://github.com/mjulian31 created https://github.com/llvm/llvm-project/pull/169120

InstCombine phi scalarization would always create a new binary op with the phi as the first operand, which is not correct for non-commutable binary ops such as sub. This fix preserves the original binary op ordering in the new binary op and adds a test for this behavior. Currently, this transformation can produce silently incorrect IR, and in the case of the added test, would optimize it out entirely.

>From 91092805a840eddb14d5ae22ade026b3ddecebeb Mon Sep 17 00:00:00 2001
From: Meredith Julian <mjulian at nvidia.com>
Date: Fri, 21 Nov 2025 14:07:02 -0800
Subject: [PATCH] instcombine scalarization fix

---
 .../InstCombine/InstCombineVectorOps.cpp      | 11 +++--
 .../Transforms/InstCombine/scalarization.ll   | 45 +++++++++++++++++++
 2 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 18a45c6799bac..f58de35585123 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -140,8 +140,8 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
     Value *Elt = EI.getIndexOperand();
     // If the operand is the PHI induction variable:
     if (PHIInVal == PHIUser) {
-      // Scalarize the binary operation. Its first operand is the
-      // scalar PHI, and the second operand is extracted from the other
+      // Scalarize the binary operation. One operand is the
+      // scalar PHI, and the other is extracted from the other
       // vector operand.
       BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
       unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
@@ -149,9 +149,14 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
           ExtractElementInst::Create(B0->getOperand(opId), Elt,
                                      B0->getOperand(opId)->getName() + ".Elt"),
           B0->getIterator());
+      // Preserve operand order for binary operation to preserve semantics of
+      // non-commutative operations.
+      Value *FirstOp = (B0->getOperand(0) == PN) ? scalarPHI : Op;
+      Value *SecondOp = (B0->getOperand(0) == PN) ? Op : scalarPHI;
       Value *newPHIUser = InsertNewInstWith(
           BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
-                                                scalarPHI, Op, B0), B0->getIterator());
+                                                FirstOp, SecondOp, B0),
+          B0->getIterator());
       scalarPHI->addIncoming(newPHIUser, inBB);
     } else {
       // Scalarize PHI input:
diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll
index a6931b4c41d2d..911bcc32eaa5b 100644
--- a/llvm/test/Transforms/InstCombine/scalarization.ll
+++ b/llvm/test/Transforms/InstCombine/scalarization.ll
@@ -108,6 +108,51 @@ for.end:
   ret void
 }
 
+define void @scalarize_phi_sub(ptr %n, ptr %inout) {
+;
+; CHECK-LABEL: @scalarize_phi_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T0:%.*]] = load volatile float, ptr [[INOUT:%.*]], align 4
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[T1:%.*]] = load i32, ptr [[N:%.*]], align 4
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]]
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
+; CHECK:       for.body:
+; CHECK-NEXT:    store volatile float [[TMP0]], ptr [[INOUT]], align 4
+; CHECK-NEXT:    [[TMP1]] = fsub float 0.000000e+00, [[TMP0]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %t0 = load volatile float, ptr %inout, align 4
+  %insert = insertelement <4 x float> undef, float %t0, i32 0
+  %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer
+  %insert1 = insertelement <4 x float> undef, float 3.0, i32 0
+  br label %for.cond
+
+for.cond:
+  %x.0 = phi <4 x float> [ %splat, %entry ], [ %sub, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %t1 = load i32, ptr %n, align 4
+  %cmp = icmp ne i32 %i.0, %t1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %t2 = extractelement <4 x float> %x.0, i32 1
+  store volatile float %t2, ptr %inout, align 4
+  %sub = fsub <4 x float> zeroinitializer, %x.0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
 define float @extract_element_binop_splat_constant_index(<4 x float> %x) {
 ;
 ; CHECK-LABEL: @extract_element_binop_splat_constant_index(



More information about the llvm-commits mailing list