[llvm] [InstCombine] Avoid breaking FMA pattern when hoisting freeze (PR #141934)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 05:13:48 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Harrison Hao (harrisonGPU)
<details>
<summary>Changes</summary>
Avoid pushing `freeze` into common FMA patterns like:
- $$(x \times y) + z \Rightarrow \text{fma}(x, y, z)$$
- $$x + (y \times z) \Rightarrow \text{fma}(y, z, x)$$
- $$(x \times y) - z \Rightarrow \text{fma}(x, y, -z)$$
- $$x - (y \times z) \Rightarrow \text{fma}(-y, z, x)$$
These patterns are common in performance-sensitive code and enabling FMA formation has important impact on numerical precision and speed.
Closes: https://github.com/llvm/llvm-project/issues/141622
---
Full diff: https://github.com/llvm/llvm-project/pull/141934.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+43)
- (modified) llvm/test/Transforms/InstCombine/freeze.ll (+66)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 24026e310ad11..2d7a69e788fff 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -4665,6 +4665,49 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
return nullptr;
+ // Avoid pushing freeze into common FMA patterns. In these cases,
+ // adding a freeze will prevent later optimizations that recognize
+ // FMA candidates like:
+ // (fmul x, y) + z -> fma(x, y, z)
+ // x + (fmul y, z) -> fma(y, z, x)
+ // (fmul x, y) - z -> fma(x, y, -z)
+ // x - (fmul y, z) -> fma(-y, z, x)
+ //
+ // which is common in performance-critical code like matrix multiplications or
+ // numerical kernels.
+ if (auto *BinOp = dyn_cast<BinaryOperator>(OrigOp)) {
+ unsigned Opcode = BinOp->getOpcode();
+ if ((Opcode == Instruction::FAdd || Opcode == Instruction::FSub) &&
+ BinOp->hasAllowContract()) {
+ Value *A = BinOp->getOperand(0);
+ Value *B = BinOp->getOperand(1);
+
+ if (Opcode == Instruction::FAdd) {
+ // Support (x * y) + z -> fma(x, y, z)
+ if (isa<BinaryOperator>(A) &&
+ cast<BinaryOperator>(A)->getOpcode() == Instruction::FMul)
+ return nullptr;
+
+ // Support x + (y * z) -> fma(y, z, x)
+ if (isa<BinaryOperator>(B) &&
+ cast<BinaryOperator>(B)->getOpcode() == Instruction::FMul)
+ return nullptr;
+ }
+
+ if (Opcode == Instruction::FSub) {
+ // Support (x * y) - z -> fma(x, y, -z)
+ if (isa<BinaryOperator>(A) &&
+ cast<BinaryOperator>(A)->getOpcode() == Instruction::FMul)
+ return nullptr;
+
+ // Support x - (y * z) -> fma(-y, z, x)
+ if (isa<BinaryOperator>(B) &&
+ cast<BinaryOperator>(B)->getOpcode() == Instruction::FMul)
+ return nullptr;
+ }
+ }
+ }
+
// We can't push the freeze through an instruction which can itself create
// poison. If the only source of new poison is flags, we can simply
// strip them (since we know the only use is the freeze and nothing can
diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll
index 8875ce1c566f3..1a778aebda29b 100644
--- a/llvm/test/Transforms/InstCombine/freeze.ll
+++ b/llvm/test/Transforms/InstCombine/freeze.ll
@@ -1195,6 +1195,72 @@ define i1 @propagate_drop_flags_icmp(i32 %a, i32 %b) {
declare i32 @llvm.umax.i32(i32 %a, i32 %b)
+define i1 @propagate_drop_fma_mul_add_left(float %arg1, float %arg2) {
+; CHECK-LABEL: @propagate_drop_fma_mul_add_left(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT: [[I1:%.*]] = fadd contract float [[I]], 1.000000e+00
+; CHECK-NEXT: [[I1_FR:%.*]] = freeze float [[I1]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[I1_FR]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+bb:
+ %i = fmul contract float %arg1, %arg2
+ %i1 = fadd contract float %i, 1.0
+ %cmp = fcmp ogt float %i1, 0.0
+ %fr = freeze i1 %cmp
+ ret i1 %fr
+}
+
+define i1 @propagate_drop_fma_add_mul_right(float %arg1, float %arg2) {
+; CHECK-LABEL: @propagate_drop_fma_add_mul_right(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT: [[I1:%.*]] = fadd contract float [[I]], 1.000000e+00
+; CHECK-NEXT: [[I1_FR:%.*]] = freeze float [[I1]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[I1_FR]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+bb:
+ %i = fmul contract float %arg1, %arg2
+ %i1 = fadd contract float 1.0, %i
+ %cmp = fcmp ogt float %i1, 0.0
+ %fr = freeze i1 %cmp
+ ret i1 %fr
+}
+
+define i1 @propagate_drop_fma_mul_sub_left(float %arg1, float %arg2) {
+; CHECK-LABEL: @propagate_drop_fma_mul_sub_left(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT: [[I1:%.*]] = fadd contract float [[I]], -1.000000e+00
+; CHECK-NEXT: [[I1_FR:%.*]] = freeze float [[I1]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[I1_FR]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+bb:
+ %i = fmul contract float %arg1, %arg2
+ %i1 = fsub contract float %i, 1.0
+ %cmp = fcmp ogt float %i1, 0.0
+ %fr = freeze i1 %cmp
+ ret i1 %fr
+}
+
+define float @propagate_drop_fma_sub_mul_right(float %arg1, float %arg2) {
+; CHECK-LABEL: @propagate_drop_fma_sub_mul_right(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[I:%.*]] = fmul contract float [[ARG1:%.*]], [[ARG2:%.*]]
+; CHECK-NEXT: [[I1:%.*]] = fsub contract float 1.000000e+00, [[I]]
+; CHECK-NEXT: [[FR:%.*]] = freeze float [[I1]]
+; CHECK-NEXT: ret float [[FR]]
+;
+bb:
+ %i = fmul contract float %arg1, %arg2
+ %i1 = fsub contract float 1.0, %i
+ %fr = freeze float %i1
+ ret float %fr
+}
+
define i32 @freeze_call_with_range_attr(i32 %a) {
; CHECK-LABEL: @freeze_call_with_range_attr(
; CHECK-NEXT: [[Y:%.*]] = lshr i32 2047, [[A:%.*]]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141934
More information about the llvm-commits
mailing list