[llvm] IR: Add attribute negated (PR #121027)

Mon Dec 23 20:53:38 PST 2024

llvmbot wrote:



@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-selectiondag

Author: YunQiang Su (wzssyqa)

<details>
<summary>Changes</summary>

For floating point operation `c-a*b` and `c+(-a)*b` may be different with `upward` or `downward` rounding.
Now we have only `fmuladd`, so Clang converts both of these 2 cases into
```
%neg = fneg double %a
%0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
``` 

Let's introduce the attribute `negated` for the case `c-a*b` to mark the `%neg` is negated by the frontend instead of the user code itself. So that we can lowering `c-a*b` to `fmul` and `fsub`.

See: https://discourse.llvm.org/t/rfc-c-a-b-vs-a-b-c-for-strict-mode/83745/1

---
Full diff: https://github.com/llvm/llvm-project/pull/121027.diff


8 Files Affected:

- (modified) llvm/docs/LangRef.rst (+8) 
- (modified) llvm/include/llvm/Bitcode/LLVMBitCodes.h (+1) 
- (modified) llvm/include/llvm/IR/Attributes.td (+3) 
- (modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+2) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+19-1) 
- (modified) llvm/lib/Transforms/Utils/CodeExtractor.cpp (+1) 
- (added) llvm/test/CodeGen/Mips/attribute-negated.ll (+54) 
- (added) llvm/test/CodeGen/X86/attribute-negated.ll (+72) 


``````````diff

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 7e01331b20c570..9e5f39530b1679 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -1573,6 +1573,14 @@ Currently, only the following parameter attributes are defined:
     | pinf  | Positive infinity    |       512     |
     +-------+----------------------+---------------+
 
+``negated``
+    The function parameter marked with this attribute is negated from
+    its opposite number by the frontend like Clang. The middle end or
+    backend should convert it back if possible. For example, `c-a*b`
+    is different with `c+(-a)*b`. Since we have only `fmuladd`,
+    this attribute on `a` is to mark that we are working on `c-a*b`.
+    So that we can convert `c+(-a)*b` to `fmsub` instruction
+    or `fmul`/`fsub`.
 
 ``alignstack(<n>)``
     This indicates the alignment that should be considered by the backend when
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 21fd27d9838db7..7e9d174db22026 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -788,6 +788,7 @@ enum AttributeKindCodes {
   ATTR_KIND_NO_EXT = 99,
   ATTR_KIND_NO_DIVERGENCE_SOURCE = 100,
   ATTR_KIND_SANITIZE_TYPE = 101,
+  ATTR_KIND_NEGATED = 102,
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index 61955cf883c3f1..baeca5d53f3c46 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -162,6 +162,9 @@ def Memory : IntAttr<"memory", IntersectCustom, [FnAttr]>;
 /// Forbidden floating-point classes.
 def NoFPClass : IntAttr<"nofpclass", IntersectCustom, [ParamAttr, RetAttr]>;
 
+/// Converted from the opposite number
+def Negated : EnumAttr<"negated", IntersectAnd, [ParamAttr, RetAttr]>;
+
 /// Function must be optimized for size first.
 def MinSize : EnumAttr<"minsize", IntersectPreserve, [FnAttr]>;
 
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index b4efd3928a2e6f..e87c9d2e13883d 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -755,6 +755,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_MEMORY;
   case Attribute::NoFPClass:
     return bitc::ATTR_KIND_NOFPCLASS;
+  case Attribute::Negated:
+    return bitc::ATTR_KIND_NEGATED;
   case Attribute::Naked:
     return bitc::ATTR_KIND_NAKED;
   case Attribute::Nest:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f8d7c3ef7bbe71..a9de7e25a4eb6a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8360,10 +8360,28 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
     // Break fmuladd into fmul and fadd.
     if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict ||
         !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
+      auto PrevNode = FPI.getPrevNode();
+      bool convertToFMULSUB = false;
+      if (PrevNode && PrevNode->getOpcode() == Instruction::FNeg) {
+        if (PrevNode->getName() == FPI.getOperand(0)->getName() &&
+            FPI.getAttributes().getParamAttrs(0).hasAttribute(
+                Attribute::Negated)) {
+          Opers[1] = DAG.getNode(ISD::FNEG, sdl, VT, Opers[1]).getValue(0);
+          convertToFMULSUB = true;
+        } else if (PrevNode->getName() == FPI.getOperand(1)->getName() &&
+                   FPI.getAttributes().getParamAttrs(1).hasAttribute(
+                       Attribute::Negated)) {
+          Opers[2] = DAG.getNode(ISD::FNEG, sdl, VT, Opers[2]).getValue(0);
+          convertToFMULSUB = true;
+        }
+      }
       Opers.pop_back();
       SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
       pushOutChain(Mul, EB);
-      Opcode = ISD::STRICT_FADD;
+      if (convertToFMULSUB)
+        Opcode = ISD::STRICT_FSUB;
+      else
+        Opcode = ISD::STRICT_FADD;
       Opers.clear();
       Opers.push_back(Mul.getValue(1));
       Opers.push_back(Mul.getValue(0));
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 7ddb9e22c83441..4e1a8c560078aa 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -918,6 +918,7 @@ Function *CodeExtractor::constructFunctionDeclaration(
       case Attribute::PresplitCoroutine:
       case Attribute::Memory:
       case Attribute::NoFPClass:
+      case Attribute::Negated:
       case Attribute::CoroDestroyOnlyWhenComplete:
       case Attribute::CoroElideSafe:
       case Attribute::NoDivergenceSource:
diff --git a/llvm/test/CodeGen/Mips/attribute-negated.ll b/llvm/test/CodeGen/Mips/attribute-negated.ll
new file mode 100644
index 00000000000000..04f7257e446f9a
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/attribute-negated.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=mipsel -mattr=+fp64,+mips32r2 < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK
+
+define dso_local double @x1(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mul.d $f0, $f12, $f14
+; CHECK-NEXT:    ldc1 $f1, 16($sp)
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    sub.d $f0, $f0, $f1
+entry:
+  %neg = fneg double %a
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double negated %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x2(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mul.d $f0, $f12, $f14
+; CHECK-NEXT:    ldc1 $f1, 16($sp)
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    sub.d $f0, $f0, $f1
+entry:
+  %neg = fneg double %b
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double negated %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x3(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    neg.d $f0, $f12
+; CHECK-NEXT:    mul.d $f0, $f0, $f14
+; CHECK-NEXT:    ldc1 $f1, 16($sp)
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    add.d $f0, $f0, $f1
+entry:
+  %neg = fneg double %a
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x4(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    neg.d $f0, $f14
+; CHECK-NEXT:    mul.d $f0, $f12, $f0
+; CHECK-NEXT:    ldc1 $f1, 16($sp)
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    add.d $f0, $f0, $f1
+entry:
+  %neg = fneg double %b
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
diff --git a/llvm/test/CodeGen/X86/attribute-negated.ll b/llvm/test/CodeGen/X86/attribute-negated.ll
new file mode 100644
index 00000000000000..b54b4b6f3c8bab
--- /dev/null
+++ b/llvm/test/CodeGen/X86/attribute-negated.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64-- -mattr=-fma < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=x86_64-- -mattr=+fma < %s \
+; RUN:   | FileCheck %s --check-prefixes=CHECK-FMA
+
+define dso_local double @x1(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    subsd %xmm2, %xmm0
+; CHECK-NEXT:    retq
+;
+; CHECK-FMA-LABEL: x1:
+; CHECK-FMA:       # %bb.0: # %entry
+; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; CHECK-FMA-NEXT:    retq
+entry:
+  %neg = fneg double %a
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double negated %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x2(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    subsd %xmm2, %xmm0
+; CHECK-NEXT:    retq
+;
+; CHECK-FMA-LABEL: x2:
+; CHECK-FMA:       # %bb.0: # %entry
+; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; CHECK-FMA-NEXT:    retq
+entry:
+  %neg = fneg double %b
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double negated %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x3(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    addsd %xmm2, %xmm0
+; CHECK-NEXT:    retq
+;
+; CHECK-FMA-LABEL: x3:
+; CHECK-FMA:       # %bb.0: # %entry
+; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; CHECK-FMA-NEXT:    retq
+entry:
+  %neg = fneg double %a
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %neg, double %b, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}
+define dso_local double @x4(double noundef %a, double noundef %b, double noundef %c) local_unnamed_addr #0 {
+; CHECK-LABEL: x4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    addsd %xmm2, %xmm0
+; CHECK-NEXT:    retq
+;
+; CHECK-FMA-LABEL: x4:
+; CHECK-FMA:       # %bb.0: # %entry
+; CHECK-FMA-NEXT:    vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; CHECK-FMA-NEXT:    retq
+entry:
+  %neg = fneg double %b
+  %0 = tail call double @llvm.experimental.constrained.fmuladd.f64(double %a, double %neg, double %c, metadata !"round.dynamic", metadata !"fpexcept.ignore")
+  ret double %0
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/121027