[clang] [llvm] Clang/buildFMulAdd: Use negated attribute (PR #121038)

Tue Dec 24 01:02:09 PST 2024

https://github.com/wzssyqa created https://github.com/llvm/llvm-project/pull/121038

Use negated attribute if negMul or negAdd. So that we can lower
fneg+fmuladd to fmul+fsub if needed.
    
1) It can save one machine instruction:
    fneg/fmul/fadd vs fmul/fsub
2) In strict mode, `c-a*b` may be different with `c+(-a)*b`.

>From c474a905f473d0844caf6ad7dd7b6ff338d01d34 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 24 Dec 2024 04:16:16 +0000
Subject: [PATCH 1/6] Support attribute negated

---
 llvm/docs/LangRef.rst                         |  8 ++++++++
 llvm/include/llvm/Bitcode/LLVMBitCodes.h      |  1 +
 llvm/include/llvm/IR/Attributes.td            |  3 +++
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  2 ++
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 20 ++++++++++++++++++-
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   |  1 +
 6 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 7e01331b20c570..9e5f39530b1679 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1573,6 +1573,14 @@ Currently, only the following parameter attributes are defined:
     | pinf  | Positive infinity    |       512     |
     +-------+----------------------+---------------+
 
+``negated``
+    The function parameter marked with this attribute is negated from
+    its opposite number by the frontend like Clang. The middle end or
+    backend should convert it back if possible. For example, `c-a*b`
+    is different with `c+(-a)*b`. Since we have only `fmuladd`,
+    this attribute on `a` is to mark that we are working on `c-a*b`.
+    So that we can convert `c+(-a)*b` to `fmsub` instruction
+    or `fmul`/`fsub`.
 
 ``alignstack(<n>)``
     This indicates the alignment that should be considered by the backend when
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 21fd27d9838db7..7e9d174db22026 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -788,6 +788,7 @@ enum AttributeKindCodes {
   ATTR_KIND_NO_EXT = 99,
   ATTR_KIND_NO_DIVERGENCE_SOURCE = 100,
   ATTR_KIND_SANITIZE_TYPE = 101,
+  ATTR_KIND_NEGATED = 102,
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index 61955cf883c3f1..baeca5d53f3c46 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -162,6 +162,9 @@ def Memory : IntAttr<"memory", IntersectCustom, [FnAttr]>;
 /// Forbidden floating-point classes.
 def NoFPClass : IntAttr<"nofpclass", IntersectCustom, [ParamAttr, RetAttr]>;
 
+/// Converted from the opposite number
+def Negated : EnumAttr<"negated", IntersectAnd, [ParamAttr, RetAttr]>;
+
 /// Function must be optimized for size first.
 def MinSize : EnumAttr<"minsize", IntersectPreserve, [FnAttr]>;
 
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index b4efd3928a2e6f..e87c9d2e13883d 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -755,6 +755,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_MEMORY;
   case Attribute::NoFPClass:
     return bitc::ATTR_KIND_NOFPCLASS;
+  case Attribute::Negated:
+    return bitc::ATTR_KIND_NEGATED;
   case Attribute::Naked:
     return bitc::ATTR_KIND_NAKED;
   case Attribute::Nest:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f8d7c3ef7bbe71..a9de7e25a4eb6a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8360,10 +8360,28 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
     // Break fmuladd into fmul and fadd.
     if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
         !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
+      auto PrevNode = FPI.getPrevNode();
+      bool convertToFMULSUB = false;
+      if (PrevNode && PrevNode->getOpcode() == Instruction::FNeg) {
+        if (PrevNode->getName() == FPI.getOperand(0)->getName() &&
+            FPI.getAttributes().getParamAttrs(0).hasAttribute(
+                Attribute::Negated)) {
+          Opers[1] = DAG.getNode(ISD::FNEG, sdl, VT, Opers[1]).getValue(0);
+          convertToFMULSUB = true;
+        } else if (PrevNode->getName() == FPI.getOperand(1)->getName() &&
+                   FPI.getAttributes().getParamAttrs(1).hasAttribute(
+                       Attribute::Negated)) {
+          Opers[2] = DAG.getNode(ISD::FNEG, sdl, VT, Opers[2]).getValue(0);
+          convertToFMULSUB = true;
+        }
+      }
       Opers.pop_back();
       SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
       pushOutChain(Mul, EB);
-      Opcode = ISD::STRICT_FADD;
+      if (convertToFMULSUB)
+        Opcode = ISD::STRICT_FSUB;
+      else
+        Opcode = ISD::STRICT_FADD;
       Opers.clear();
       Opers.push_back(Mul.getValue(1));
       Opers.push_back(Mul.getValue(0));
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 7ddb9e22c83441..4e1a8c560078aa 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -918,6 +918,7 @@ Function *CodeExtractor::constructFunctionDeclaration(
       case Attribute::PresplitCoroutine:
       case Attribute::Memory:
       case Attribute::NoFPClass:
+      case Attribute::Negated:
       case Attribute::CoroDestroyOnlyWhenComplete:
       case Attribute::CoroElideSafe:
       case Attribute::NoDivergenceSource:

>From 73c85b212be0b2950e27ea3fdd4b83cc51714e76 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 24 Dec 2024 12:44:22 +0800
Subject: [PATCH 2/6] Add testcase

---
 llvm/test/CodeGen/Mips/attribute-negated.ll | 54 ++++++++++++++++
 llvm/test/CodeGen/X86/attribute-negated.ll  | 72 +++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 llvm/test/CodeGen/Mips/attribute-negated.ll
 create mode 100644 llvm/test/CodeGen/X86/attribute-negated.ll

diff --git a/llvm/test/CodeGen/Mips/attribute-negated.ll b/llvm/test/CodeGen/Mips/attribute-negated.ll
new file mode 100644
index 00000000000000..04f7257e446f9a
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/attribute-negated.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=mipsel -mattr=+fp64,+mips32r2 < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK
+
+define dso_local double @x1(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mul.d $f0, $f12, $f14
+; CHECK-NEXT:    ldc1 $f1, 16($sp)
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    sub.d $f0, $f0, $f1
+entry:
+  %neg = fneg double %a
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double negated %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x2(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mul.d $f0, $f12, $f14
+; CHECK-NEXT:    ldc1 $f1, 16($sp)
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    sub.d $f0, $f0, $f1
+entry:
+  %neg = fneg double %b
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double negated %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x3(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    neg.d $f0, $f12
+; CHECK-NEXT:    mul.d $f0, $f0, $f14
+; CHECK-NEXT:    ldc1 $f1, 16($sp)
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    add.d $f0, $f0, $f1
+entry:
+  %neg = fneg double %a
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x4(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    neg.d $f0, $f14
+; CHECK-NEXT:    mul.d $f0, $f12, $f0
+; CHECK-NEXT:    ldc1 $f1, 16($sp)
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    add.d $f0, $f0, $f1
+entry:
+  %neg = fneg double %b
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
diff --git a/llvm/test/CodeGen/X86/attribute-negated.ll b/llvm/test/CodeGen/X86/attribute-negated.ll
new file mode 100644
index 00000000000000..b54b4b6f3c8bab
--- /dev/null
+++ b/llvm/test/CodeGen/X86/attribute-negated.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64-- -mattr=-fma < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=x86_64-- -mattr=+fma < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK-FMA
+
+define dso_local double @x1(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    subsd %xmm2, %xmm0
+; CHECK-NEXT:    retq
+;
+; CHECK-FMA-LABEL: x1:
+; CHECK-FMA:       # %bb.0: # %entry
+; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; CHECK-FMA-NEXT:    retq
+entry:
+  %neg = fneg double %a
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double negated %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x2(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    subsd %xmm2, %xmm0
+; CHECK-NEXT:    retq
+;
+; CHECK-FMA-LABEL: x2:
+; CHECK-FMA:       # %bb.0: # %entry
+; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; CHECK-FMA-NEXT:    retq
+entry:
+  %neg = fneg double %b
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double negated %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x3(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    addsd %xmm2, %xmm0
+; CHECK-NEXT:    retq
+;
+; CHECK-FMA-LABEL: x3:
+; CHECK-FMA:       # %bb.0: # %entry
+; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; CHECK-FMA-NEXT:    retq
+entry:
+  %neg = fneg double %a
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x4(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    addsd %xmm2, %xmm0
+; CHECK-NEXT:    retq
+;
+; CHECK-FMA-LABEL: x4:
+; CHECK-FMA:       # %bb.0: # %entry
+; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; CHECK-FMA-NEXT:    retq
+entry:
+  %neg = fneg double %b
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}

>From e4d042f22c83e056a9c6618969b2caa0726d50a7 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 24 Dec 2024 08:17:45 +0000
Subject: [PATCH 3/6] Revert fmuladd support

---
 .../SelectionDAG/SelectionDAGBuilder.cpp      | 20 +------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a9de7e25a4eb6a..f8d7c3ef7bbe71 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8360,28 +8360,10 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
     // Break fmuladd into fmul and fadd.
     if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
         !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
-      auto PrevNode = FPI.getPrevNode();
-      bool convertToFMULSUB = false;
-      if (PrevNode && PrevNode->getOpcode() == Instruction::FNeg) {
-        if (PrevNode->getName() == FPI.getOperand(0)->getName() &&
-            FPI.getAttributes().getParamAttrs(0).hasAttribute(
-                Attribute::Negated)) {
-          Opers[1] = DAG.getNode(ISD::FNEG, sdl, VT, Opers[1]).getValue(0);
-          convertToFMULSUB = true;
-        } else if (PrevNode->getName() == FPI.getOperand(1)->getName() &&
-                   FPI.getAttributes().getParamAttrs(1).hasAttribute(
-                       Attribute::Negated)) {
-          Opers[2] = DAG.getNode(ISD::FNEG, sdl, VT, Opers[2]).getValue(0);
-          convertToFMULSUB = true;
-        }
-      }
       Opers.pop_back();
       SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
       pushOutChain(Mul, EB);
-      if (convertToFMULSUB)
-        Opcode = ISD::STRICT_FSUB;
-      else
-        Opcode = ISD::STRICT_FADD;
+      Opcode = ISD::STRICT_FADD;
       Opers.clear();
       Opers.push_back(Mul.getValue(1));
       Opers.push_back(Mul.getValue(0));

>From 803d99dcf269b336af72c9878bdea1de94e20c58 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 24 Dec 2024 08:17:58 +0000
Subject: [PATCH 4/6] Revert "Add testcase"

This reverts commit 67789aad898c90576a5c591f85ebc9ac33db8615.
---
 llvm/test/CodeGen/Mips/attribute-negated.ll | 54 ----------------
 llvm/test/CodeGen/X86/attribute-negated.ll  | 72 ---------------------
 2 files changed, 126 deletions(-)
 delete mode 100644 llvm/test/CodeGen/Mips/attribute-negated.ll
 delete mode 100644 llvm/test/CodeGen/X86/attribute-negated.ll

diff --git a/llvm/test/CodeGen/Mips/attribute-negated.ll b/llvm/test/CodeGen/Mips/attribute-negated.ll
deleted file mode 100644
index 04f7257e446f9a..00000000000000
--- a/llvm/test/CodeGen/Mips/attribute-negated.ll
+++ /dev/null
@@ -1,54 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=mipsel -mattr=+fp64,+mips32r2 < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK
-
-define dso_local double @x1(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: x1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mul.d $f0, $f12, $f14
-; CHECK-NEXT:    ldc1 $f1, 16($sp)
-; CHECK-NEXT:    jr $ra
-; CHECK-NEXT:    sub.d $f0, $f0, $f1
-entry:
-  %neg = fneg double %a
-  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double negated %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
-  ret double %0
-}
-define dso_local double @x2(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: x2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mul.d $f0, $f12, $f14
-; CHECK-NEXT:    ldc1 $f1, 16($sp)
-; CHECK-NEXT:    jr $ra
-; CHECK-NEXT:    sub.d $f0, $f0, $f1
-entry:
-  %neg = fneg double %b
-  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double negated %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
-  ret double %0
-}
-define dso_local double @x3(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: x3:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    neg.d $f0, $f12
-; CHECK-NEXT:    mul.d $f0, $f0, $f14
-; CHECK-NEXT:    ldc1 $f1, 16($sp)
-; CHECK-NEXT:    jr $ra
-; CHECK-NEXT:    add.d $f0, $f0, $f1
-entry:
-  %neg = fneg double %a
-  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
-  ret double %0
-}
-define dso_local double @x4(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: x4:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    neg.d $f0, $f14
-; CHECK-NEXT:    mul.d $f0, $f12, $f0
-; CHECK-NEXT:    ldc1 $f1, 16($sp)
-; CHECK-NEXT:    jr $ra
-; CHECK-NEXT:    add.d $f0, $f0, $f1
-entry:
-  %neg = fneg double %b
-  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
-  ret double %0
-}
diff --git a/llvm/test/CodeGen/X86/attribute-negated.ll b/llvm/test/CodeGen/X86/attribute-negated.ll
deleted file mode 100644
index b54b4b6f3c8bab..00000000000000
--- a/llvm/test/CodeGen/X86/attribute-negated.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=x86_64-- -mattr=-fma < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple=x86_64-- -mattr=+fma < %s \
-; RUN:   | FileCheck %s --check-prefixes=CHECK-FMA
-
-define dso_local double @x1(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: x1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mulsd %xmm1, %xmm0
-; CHECK-NEXT:    subsd %xmm2, %xmm0
-; CHECK-NEXT:    retq
-;
-; CHECK-FMA-LABEL: x1:
-; CHECK-FMA:       # %bb.0: # %entry
-; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
-; CHECK-FMA-NEXT:    retq
-entry:
-  %neg = fneg double %a
-  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double negated %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
-  ret double %0
-}
-define dso_local double @x2(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: x2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mulsd %xmm1, %xmm0
-; CHECK-NEXT:    subsd %xmm2, %xmm0
-; CHECK-NEXT:    retq
-;
-; CHECK-FMA-LABEL: x2:
-; CHECK-FMA:       # %bb.0: # %entry
-; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
-; CHECK-FMA-NEXT:    retq
-entry:
-  %neg = fneg double %b
-  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double negated %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
-  ret double %0
-}
-define dso_local double @x3(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: x3:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; CHECK-NEXT:    mulsd %xmm1, %xmm0
-; CHECK-NEXT:    addsd %xmm2, %xmm0
-; CHECK-NEXT:    retq
-;
-; CHECK-FMA-LABEL: x3:
-; CHECK-FMA:       # %bb.0: # %entry
-; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
-; CHECK-FMA-NEXT:    retq
-entry:
-  %neg = fneg double %a
-  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
-  ret double %0
-}
-define dso_local double @x4(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
-; CHECK-LABEL: x4:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; CHECK-NEXT:    mulsd %xmm1, %xmm0
-; CHECK-NEXT:    addsd %xmm2, %xmm0
-; CHECK-NEXT:    retq
-;
-; CHECK-FMA-LABEL: x4:
-; CHECK-FMA:       # %bb.0: # %entry
-; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
-; CHECK-FMA-NEXT:    retq
-entry:
-  %neg = fneg double %b
-  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
-  ret double %0
-}

>From 31782fb96fc283fdd2ea177078b9d0b2920dc38f Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 24 Dec 2024 08:21:28 +0000
Subject: [PATCH 5/6] improve LangRef

---
 llvm/docs/LangRef.rst | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 9e5f39530b1679..bf37e6a788c4b6 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1576,11 +1576,9 @@ Currently, only the following parameter attributes are defined:
 ``negated``
     The function parameter marked with this attribute is negated from
     its opposite number by the frontend like Clang. The middle end or
-    backend should convert it back if possible. For example, `c-a*b`
-    is different with `c+(-a)*b`. Since we have only `fmuladd`,
-    this attribute on `a` is to mark that we are working on `c-a*b`.
-    So that we can convert `c+(-a)*b` to `fmsub` instruction
-    or `fmul`/`fsub`.
+    backend should convert it back if possible. For example if -(a*b)
+    is converted to (-a)*b, the arg0 of `fmul` instruction should be
+    marked with `negated` attribute.
 
 ``alignstack(<n>)``
     This indicates the alignment that should be considered by the backend when

>From 9a8925b18e609ac646b2c16da81264a261545513 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 24 Dec 2024 08:14:22 +0000
Subject: [PATCH 6/6] Clang/buildFMulAdd: Use negated attribute

Use negated attribute if negMul or negAdd. So that we can lower
fneg+fmuladd to fmul+fsub if needed.

1) It can save one machine instruction:
   fneg/fmul/fadd vs fmul/fsub
2) In strict mode, `c-a*b` may be different with `c+(-a)*b`.
---
 clang/lib/CodeGen/CGExprScalar.cpp             | 4 ++++
 clang/test/CodeGen/constrained-math-builtins.c | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 4b71bd730ce12c..14d73de055d8ec 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -4120,6 +4120,10 @@ static Value* buildFMulAdd(llvm::Instruction *MulOp, Value *Addend,
         CGF.CGM.getIntrinsic(llvm::Intrinsic::experimental_constrained_fmuladd,
                              Addend->getType()),
         {MulOp0, MulOp1, Addend});
+    if (negMul)
+      dyn_cast<llvm::CallBase>(FMulAdd)->addParamAttr(0, llvm::Attribute::Negated);
+    if (negAdd)
+      dyn_cast<llvm::CallBase>(FMulAdd)->addParamAttr(2, llvm::Attribute::Negated);
   } else {
     FMulAdd = Builder.CreateCall(
         CGF.CGM.getIntrinsic(llvm::Intrinsic::fmuladd, Addend->getType()),
diff --git a/clang/test/CodeGen/constrained-math-builtins.c b/clang/test/CodeGen/constrained-math-builtins.c
index 68b9e75283c547..f044f15e98918b 100644
--- a/clang/test/CodeGen/constrained-math-builtins.c
+++ b/clang/test/CodeGen/constrained-math-builtins.c
@@ -392,12 +392,12 @@ void bar(float f) {
 
   // CHECK: call float @llvm.experimental.constrained.fmuladd.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
   // CHECK: fneg
-  // CHECK: call double @llvm.experimental.constrained.fmuladd.f64(double %{{.*}}, double %{{.*}}, double %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK: call double @llvm.experimental.constrained.fmuladd.f64(double %{{.*}}, double %{{.*}}, double negated %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
   // CHECK: fneg
   // CHECK: call x86_fp80 @llvm.experimental.constrained.fmuladd.f80(x86_fp80 %{{.*}}, x86_fp80 %{{.*}}, x86_fp80 %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
   // CHECK: fneg
   // CHECK: fneg
-  // CHECK: call float @llvm.experimental.constrained.fmuladd.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK: call float @llvm.experimental.constrained.fmuladd.f32(float negated %{{.*}}, float %{{.*}}, float negated %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
   // CHECK: fneg
-  // CHECK: call float @llvm.experimental.constrained.fmuladd.f32(float %{{.*}}, float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK: call float @llvm.experimental.constrained.fmuladd.f32(float negated %{{.*}}, float %{{.*}}, float %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
 };