[llvm] LICM: hoist BO assoc for FAdd and FMul (PR #108415)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 12 09:24:09 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Ramkumar Ramachandra (artagnon)

<details>
<summary>Changes</summary>

Extend hoistBOAssociation to the FAdd and FMul cases, noting that we copy an intersection of the fast-math flags present in both instructions.

---
Full diff: https://github.com/llvm/llvm-project/pull/108415.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/LICM.cpp (+15-2) 
- (modified) llvm/test/Transforms/LICM/hoist-binop.ll (+227-7) 


``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 23e9c70b62642a..4b1650b93cc1d1 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2819,10 +2819,17 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
   if (!BO || !BO->isAssociative())
     return false;
 
-  // TODO: Only hoist ADDs and MULs for now.
+  // TODO: Only hoist ADDs, MULs, FADDs, and FMULs for now.
   Instruction::BinaryOps Opcode = BO->getOpcode();
-  if (Opcode != Instruction::Add && Opcode != Instruction::Mul)
+  switch (Opcode) {
+  case Instruction::Add:
+  case Instruction::Mul:
+  case Instruction::FAdd:
+  case Instruction::FMul:
+    break;
+  default:
     return false;
+  }
 
   bool LVInRHS = L.isLoopInvariant(BO->getOperand(0));
   auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(LVInRHS));
@@ -2857,6 +2864,12 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
     if (auto *I = dyn_cast<Instruction>(Inv))
       I->setHasNoUnsignedWrap(true);
     NewBO->setHasNoUnsignedWrap(true);
+  } else if (Opcode == Instruction::FAdd || Opcode == Instruction::FMul) {
+    // Intersect FMF flags for FADD and FMUL.
+    FastMathFlags Intersect = BO->getFastMathFlags() & BO0->getFastMathFlags();
+    if (auto *I = dyn_cast<Instruction>(Inv))
+      I->setFastMathFlags(Intersect);
+    NewBO->setFastMathFlags(Intersect);
   }
 
   BO->replaceAllUsesWith(NewBO);
diff --git a/llvm/test/Transforms/LICM/hoist-binop.ll b/llvm/test/Transforms/LICM/hoist-binop.ll
index a840e247578840..74e2b7a2caf4a5 100644
--- a/llvm/test/Transforms/LICM/hoist-binop.ll
+++ b/llvm/test/Transforms/LICM/hoist-binop.ll
@@ -437,17 +437,17 @@ loop:
   br label %loop
 }
 
-; Don't hoist floating-point ops, even if they are associative. This would be
-; valid, but is currently disabled.
-define void @fadd(float %c1, float %c2) {
-; CHECK-LABEL: @fadd(
+; The simple case. Hoist if fast is present on both instructions.
+define void @fadd_fast(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_fast(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fadd fast float [[C1:%.*]], [[C2:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fadd fast float [[INDEX]], [[C1]]
 ; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = fadd fast float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = fadd fast float [[INDEX]], [[INVARIANT_OP]]
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:
@@ -461,6 +461,226 @@ loop:
   br label %loop
 }
 
+; The simple case. Hoist if fast is present on both instructions.
+define void @fmul_fast(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_fast(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fmul fast float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul fast float [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = fmul fast float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+  %step.add = fmul fast float %index, %c1
+  call void @use(float %step.add)
+  %index.next = fmul fast float %step.add, %c2
+  br label %loop
+}
+
+; The minimum case.
+; Hoist if reasassoc and nsz are present on both instructions.
+define void @fadd_reassoc_nsz(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_reassoc_nsz(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fadd reassoc nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc nsz float [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = fadd reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+  %step.add = fadd reassoc nsz float %index, %c1
+  call void @use(float %step.add)
+  %index.next = fadd reassoc nsz float %step.add, %c2
+  br label %loop
+}
+
+; The minimum case.
+; Hoist if reasassoc and nsz are present on both instructions.
+define void @fmul_reassoc_nsz(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_reassoc_nsz(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fmul reassoc nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = fmul reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+  %step.add = fmul reassoc nsz float %index, %c1
+  call void @use(float %step.add)
+  %index.next = fmul reassoc nsz float %step.add, %c2
+  br label %loop
+}
+
+; Don't hoist if both reassoc and nsz aren't present on both instructions.
+define void @fadd_nonassoc(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_nonassoc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = fadd reassoc nsz float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+  %step.add = fadd reassoc float %index, %c1
+  call void @use(float %step.add)
+  %index.next = fadd reassoc nsz float %step.add, %c2
+  br label %loop
+}
+
+; Don't hoist if both reassoc and nsz aren't present on both instructions.
+define void @fmul_noassoc(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_noassoc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz float [[INDEX]], [[C1:%.*]]
+; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = fmul nsz float [[STEP_ADD]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+  %step.add = fmul reassoc nsz float %index, %c1
+  call void @use(float %step.add)
+  %index.next = fmul nsz float %step.add, %c2
+  br label %loop
+}
+
+; No intersection in flags present on both instructions,
+; except reassoc and nsz.
+define void @fadd_fmf_nointersect(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_fmf_nointersect(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fadd reassoc nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc nnan nsz float [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = fadd reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+  %step.add = fadd reassoc nsz nnan float %index, %c1
+  call void @use(float %step.add)
+  %index.next = fadd reassoc nsz ninf float %step.add, %c2
+  br label %loop
+}
+
+; No intersection in flags present on both instructions,
+; except reassoc and nsz.
+define void @fmul_fmf_nointersect(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_fmf_nointersect(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fmul reassoc nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz contract float [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = fmul reassoc nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+  %step.add = fmul reassoc nsz contract float %index, %c1
+  call void @use(float %step.add)
+  %index.next = fmul reassoc nnan nsz float %step.add, %c2
+  br label %loop
+}
+
+; Non-empty intersection in flags present on both instructions,
+; including reassoc and nsz.
+define void @fadd_fmf_intersect(float %c1, float %c2) {
+; CHECK-LABEL: @fadd_fmf_intersect(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fadd reassoc ninf nsz float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc nnan ninf nsz float [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = fadd reassoc ninf nsz float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+  %step.add = fadd reassoc nnan nsz ninf float %index, %c1
+  call void @use(float %step.add)
+  %index.next = fadd reassoc ninf nsz float %step.add, %c2
+  br label %loop
+}
+
+; Non-empty intersection in flags present on both instructions,
+; including reassoc and nsz.
+define void @fmul_fmf_intersect(float %c1, float %c2) {
+; CHECK-LABEL: @fmul_fmf_intersect(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[INVARIANT_OP:%.*]] = fmul reassoc nsz afn float [[C1:%.*]], [[C2:%.*]]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[INDEX_NEXT_REASS:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = fmul reassoc nsz arcp afn float [[INDEX]], [[C1]]
+; CHECK-NEXT:    call void @use(float [[STEP_ADD]])
+; CHECK-NEXT:    [[INDEX_NEXT_REASS]] = fmul reassoc nsz afn float [[INDEX]], [[INVARIANT_OP]]
+; CHECK-NEXT:    br label [[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %index = phi float [ 0., %entry ], [ %index.next, %loop ]
+  %step.add = fmul reassoc afn nsz arcp float %index, %c1
+  call void @use(float %step.add)
+  %index.next = fmul reassoc nsz afn float %step.add, %c2
+  br label %loop
+}
+
 ; Don't hoist if the intermediate op has more than two uses. This is an
 ; heuristic that can be adjusted if warranted. Currently we are being
 ; conservative to minimise potential impact in code size.

``````````

</details>


https://github.com/llvm/llvm-project/pull/108415


More information about the llvm-commits mailing list