[llvm] [InstCombine] Avoid breaking FMA pattern when hoisting freeze (PR #141934)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 29 05:13:48 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Harrison Hao (harrisonGPU)

<details>
<summary>Changes</summary>

Avoid pushing `freeze` into common FMA patterns like:

- $$(x \times y) + z \Rightarrow \text{fma}(x, y, z)$$  
- $$x + (y \times z) \Rightarrow \text{fma}(y, z, x)$$  
- $$(x \times y) - z \Rightarrow \text{fma}(x, y, -z)$$  
- $$x - (y \times z) \Rightarrow \text{fma}(-y, z, x)$$

These patterns are common in performance-sensitive code and enabling FMA formation has important impact on numerical precision and speed.

Closes: https://github.com/llvm/llvm-project/issues/141622

---
Full diff: https://github.com/llvm/llvm-project/pull/141934.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+43) 
- (modified) llvm/test/Transforms/InstCombine/freeze.ll (+66) 


``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 24026e310ad11..2d7a69e788fff 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -4665,6 +4665,49 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
   if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
     return nullptr;
 
+  // Avoid pushing freeze into common FMA patterns. In these cases,
+  // adding a freeze will prevent later optimizations that recognize
+  // FMA candidates like:
+  //   (fmul x, y) + z    -> fma(x, y, z)
+  //   x + (fmul y, z)    -> fma(y, z, x)
+  //   (fmul x, y) - z    -> fma(x, y, -z)
+  //   x - (fmul y, z)    -> fma(-y, z, x)
+  //
+  // which is common in performance-critical code like matrix multiplications or
+  // numerical kernels.
+  if (auto *BinOp = dyn_cast<BinaryOperator>(OrigOp)) {
+    unsigned Opcode = BinOp->getOpcode();
+    if ((Opcode == Instruction::FAdd || Opcode == Instruction::FSub) &&
+        BinOp->hasAllowContract()) {
+      Value *A = BinOp->getOperand(0);
+      Value *B = BinOp->getOperand(1);
+
+      if (Opcode == Instruction::FAdd) {
+        // Support (x * y) + z -> fma(x, y, z)
+        if (isa<BinaryOperator>(A) &&
+            cast<BinaryOperator>(A)->getOpcode() == Instruction::FMul)
+          return nullptr;
+
+        // Support x + (y * z) -> fma(y, z, x)
+        if (isa<BinaryOperator>(B) &&
+            cast<BinaryOperator>(B)->getOpcode() == Instruction::FMul)
+          return nullptr;
+      }
+
+      if (Opcode == Instruction::FSub) {
+        // Support (x * y) - z -> fma(x, y, -z)
+        if (isa<BinaryOperator>(A) &&
+            cast<BinaryOperator>(A)->getOpcode() == Instruction::FMul)
+          return nullptr;
+
+        // Support x - (y * z) -> fma(-y, z, x)
+        if (isa<BinaryOperator>(B) &&
+            cast<BinaryOperator>(B)->getOpcode() == Instruction::FMul)
+          return nullptr;
+      }
+    }
+  }
+
   // We can't push the freeze through an instruction which can itself create
   // poison.  If the only source of new poison is flags, we can simply
   // strip them (since we know the only use is the freeze and nothing can
diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll
index 8875ce1c566f3..1a778aebda29b 100644
--- a/llvm/test/Transforms/InstCombine/freeze.ll
+++ b/llvm/test/Transforms/InstCombine/freeze.ll
@@ -1195,6 +1195,72 @@ define i1 @propagate_drop_flags_icmp(i32 %a, i32 %b) {
 
 declare i32 @llvm.umax.i32(i32 %a, i32 %b)
 
+define i1 @propagate_drop_fma_mul_add_left(float %arg1, float %arg2) {
+; CHECK-LABEL: @propagate_drop_fma_mul_add_left(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT:    [[I1:%.*]] = fadd contract float [[I]], 1.000000e+00
+; CHECK-NEXT:    [[I1_FR:%.*]] = freeze float [[I1]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[I1_FR]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+bb:
+  %i = fmul contract float %arg1, %arg2
+  %i1 = fadd contract float %i, 1.0
+  %cmp = fcmp ogt float %i1, 0.0
+  %fr = freeze i1 %cmp
+  ret i1 %fr
+}
+
+define i1 @propagate_drop_fma_add_mul_right(float %arg1, float %arg2) {
+; CHECK-LABEL: @propagate_drop_fma_add_mul_right(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT:    [[I1:%.*]] = fadd contract float [[I]], 1.000000e+00
+; CHECK-NEXT:    [[I1_FR:%.*]] = freeze float [[I1]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[I1_FR]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+bb:
+  %i = fmul contract float %arg1, %arg2
+  %i1 = fadd contract float 1.0, %i
+  %cmp = fcmp ogt float %i1, 0.0
+  %fr = freeze i1 %cmp
+  ret i1 %fr
+}
+
+define i1 @propagate_drop_fma_mul_sub_left(float %arg1, float %arg2) {
+; CHECK-LABEL: @propagate_drop_fma_mul_sub_left(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT:    [[I1:%.*]] = fadd contract float [[I]], -1.000000e+00
+; CHECK-NEXT:    [[I1_FR:%.*]] = freeze float [[I1]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[I1_FR]], 0.000000e+00
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+bb:
+  %i = fmul contract float %arg1, %arg2
+  %i1 = fsub contract float %i, 1.0
+  %cmp = fcmp ogt float %i1, 0.0
+  %fr = freeze i1 %cmp
+  ret i1 %fr
+}
+
+define float @propagate_drop_fma_sub_mul_right(float %arg1, float %arg2) {
+; CHECK-LABEL: @propagate_drop_fma_sub_mul_right(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT:    [[I1:%.*]] = fsub contract float 1.000000e+00, [[I]]
+; CHECK-NEXT:    [[FR:%.*]] = freeze float [[I1]]
+; CHECK-NEXT:    ret float [[FR]]
+;
+bb:
+  %i = fmul contract float %arg1, %arg2
+  %i1 = fsub contract float 1.0, %i
+  %fr = freeze float %i1
+  ret float %fr
+}
+
 define i32 @freeze_call_with_range_attr(i32 %a) {
 ; CHECK-LABEL: @freeze_call_with_range_attr(
 ; CHECK-NEXT:    [[Y:%.*]] = lshr i32 2047, [[A:%.*]]

``````````

</details>


https://github.com/llvm/llvm-project/pull/141934


More information about the llvm-commits mailing list