[llvm] [GlobalISel] Add a fadd 0.0 combine with nsz (PR #153748)

Thu Aug 14 23:09:22 PDT 2025

https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/153748

This is surprisingly helpful, coming up a lot from fadd reductions.

>From ae029732781306fe6f89e24175a4df58ab0c925f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 15 Aug 2025 07:07:03 +0100
Subject: [PATCH] [GlobalISel] Add a fadd 0.0 combine with nsz

---
 .../include/llvm/Target/GlobalISel/Combine.td |  9 ++++++-
 llvm/test/CodeGen/AArch64/fadd-combines.ll    | 24 +++++--------------
 llvm/test/CodeGen/AArch64/vecreduce-fadd.ll   | 15 ++++--------
 3 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b619de39a8c75..a557743f684ca 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -560,6 +560,13 @@ def right_identity_neg_zero_fp: GICombineRule<
   (apply (GIReplaceReg $dst, $x))
 >;
 
+def right_identity_neg_zero_fp_nsz: GICombineRule<
+  (defs root:$dst),
+  (match (G_FADD $dst, $x, $y, (MIFlags FmNsz)):$root,
+    [{ return Helper.matchConstantFPOp(${y}, 0.0); }]),
+  (apply (GIReplaceReg $dst, $x))
+>;
+
 // Fold x op 1 -> x
 def right_identity_one_int: GICombineRule<
   (defs root:$dst),
@@ -2003,7 +2010,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
                                         trunc_buildvector_fold,
                                         trunc_lshr_buildvector_fold,
                                         bitcast_bitcast_fold, fptrunc_fpext_fold,
-                                        right_identity_neg_zero_fp,
+                                        right_identity_neg_zero_fp, right_identity_neg_zero_fp_nsz,
                                         right_identity_neg_one_fp]>;
 
 def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll
index 93196631bc0d3..76d29a50416e3 100644
--- a/llvm/test/CodeGen/AArch64/fadd-combines.ll
+++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll
@@ -497,29 +497,17 @@ declare void @use(double)
 
 
 define float @faddvf32_zero_nsz(float %a) {
-; CHECK-SD-LABEL: faddvf32_zero_nsz:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: faddvf32_zero_nsz:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi d1, #0000000000000000
-; CHECK-GI-NEXT:    fadd s0, s0, s1
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: faddvf32_zero_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
   %b = fadd nsz float %a, 0.0
   ret float %b
 }
 
 define <2 x double> @faddv2f64_zero_nsz(<2 x double> %a) {
-; CHECK-SD-LABEL: faddv2f64_zero_nsz:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: faddv2f64_zero_nsz:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
-; CHECK-GI-NEXT:    fadd v0.2d, v0.2d, v1.2d
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: faddv2f64_zero_nsz:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ret
   %b = fadd nsz  <2 x double> %a, zeroinitializer
   ret <2 x double> %b
 }
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 86da2dbf670fd..18f463cfcf7c9 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -209,17 +209,10 @@ define double @add_D(<2 x double> %bin.rdx)  {
 }
 
 define double @add_D_pos0(<2 x double> %bin.rdx)  {
-; CHECK-SD-LABEL: add_D_pos0:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    faddp d0, v0.2d
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-LABEL: add_D_pos0:
-; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi d1, #0000000000000000
-; CHECK-GI-NEXT:    faddp d0, v0.2d
-; CHECK-GI-NEXT:    fadd d0, d0, d1
-; CHECK-GI-NEXT:    ret
+; CHECK-LABEL: add_D_pos0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    faddp d0, v0.2d
+; CHECK-NEXT:    ret
   %r = call fast double @llvm.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %bin.rdx)
   ret double %r
 }