[llvm] [ISel] Commute FMUL and inserting zero into vector lane (PR #146096)

Fri Jun 27 08:40:36 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: Julian Nagele (juliannagele)

<details>
<summary>Changes</summary>

When inserting zero into a vector that is the result of a multiplication we can do the insertion earlier, i.e. into an operand of the fmul instead.

---
Full diff: https://github.com/llvm/llvm-project/pull/146096.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+32) 
- (modified) llvm/test/CodeGen/AArch64/arm64-vmul.ll (+10) 


``````````diff

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c8b1eafd35495..867833f67b822 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26268,11 +26268,43 @@ static SDValue removeRedundantInsertVectorElt(SDNode *N) {
   return ExtractVec;
 }
 
+static SDValue commuteInsertVectorEltFMul(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
+  SDValue InsertVec = N->getOperand(0);
+  SDValue InsertVal = N->getOperand(1);
+  SDValue InsertIdx = N->getOperand(2);
+
+  // Only handle constant 0 insertion...
+  if (!(isNullConstant(InsertVal) || isNullFPConstant(InsertVal)))
+    return SDValue();
+  // ... into the result of an FMUL.
+  if (InsertVec.getOpcode() != ISD::FMUL)
+    return SDValue();
+
+  // Insert into the operand of FMUL instead.
+  SDValue FMulOp = InsertVec.getOperand(0);
+
+  if (!InsertVec.hasOneUse() || !FMulOp.hasOneUse())
+    return SDValue();
+
+  SDValue InsertOp =
+      DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), FMulOp.getValueType(),
+                  FMulOp, InsertVal, InsertIdx);
+  SDValue FMul =
+      DAG.getNode(ISD::FMUL, SDLoc(InsertVec), InsertVec.getValueType(),
+                  InsertOp, InsertVec.getOperand(1));
+  DAG.ReplaceAllUsesWith(N, &FMul);
+  return FMul;
+}
+
 static SDValue
 performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
   if (SDValue Res = removeRedundantInsertVectorElt(N))
     return Res;
 
+  if (SDValue Res = commuteInsertVectorEltFMul(N, DCI.DAG))
+    return Res;
+
   return performPostLD1Combine(N, DCI, true);
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 937a17ca6c1e0..1e33f81b9b835 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -1186,6 +1186,16 @@ define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
   ret double %res
 }
 
+define <4 x float> @fmul_insert_zero(<4 x float> %A, <4 x float> %B) {
+; CHECK-LABEL: fmul_insert_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov.s v0[3], wzr
+; CHECK-NEXT:    fmul.4s v0, v0, v1
+; CHECK-NEXT:    ret
+  %mul = fmul <4 x float> %A, %B
+  %mul_set_lane = insertelement <4 x float> %mul, float 0.000000e+00, i64 3
+  ret <4 x float> %mul_set_lane
+}
 
 
 define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {

``````````

</details>


https://github.com/llvm/llvm-project/pull/146096