[PATCH] D78668: PowerPC: Treat llvm.fma.f* intrinsic as using CTR with SPE

Wed Apr 22 14:09:41 PDT 2020

jhibbits created this revision.
jhibbits added a reviewer: PowerPC.
Herald added subscribers: llvm-commits, shchenz, kbarton, hiraditya, nemanjai.
Herald added a project: LLVM.

The SPE doesn't have a 'fma' instruction, so the intrinsic becomes a
libcall.  It really should become an expansion to two instructions, but
for some reason the compiler doesn't think that's as optimal as a
branch.  Since this lowering is done after CTR is allocated for loops,
tell the optimizer that CTR may be used in this case.  This prevents a
"Invalid PPC CTR loop!" assertion in the case that a fma() function call
is used in a C/C++ file, and clang converts it into an intrinsic.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D78668

Files:
  llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
  llvm/test/CodeGen/PowerPC/spe.ll


Index: llvm/test/CodeGen/PowerPC/spe.ll
===================================================================

--- llvm/test/CodeGen/PowerPC/spe.ll
+++ llvm/test/CodeGen/PowerPC/spe.ll
@@ -1402,7 +1402,6 @@
 ; CHECK-NEXT:    addi 1, 1, 48
 ; CHECK-NEXT:    mtlr 0
 ; CHECK-NEXT:    blr
-entry:
   %b = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0
   %1 = load float, float* undef
   %conv = fpext float %1 to double
@@ -1416,3 +1415,70 @@
   store float %l, float* undef
   ret void
 }
+
+define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: test_fma:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    stw 0, 4(1)
+; CHECK-NEXT:    stwu 1, -48(1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset lr, 4
+; CHECK-NEXT:    .cfi_offset r29, -12
+; CHECK-NEXT:    .cfi_offset r30, -8
+; CHECK-NEXT:    .cfi_offset r29, -40
+; CHECK-NEXT:    .cfi_offset r30, -32
+; CHECK-NEXT:    cmpwi 3, 1
+; CHECK-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
+; CHECK-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
+; CHECK-NEXT:    evstdd 29, 8(1) # 8-byte Folded Spill
+; CHECK-NEXT:    evstdd 30, 16(1) # 8-byte Folded Spill
+; CHECK-NEXT:    blt 0, .LBB58_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    mr 30, 3
+; CHECK-NEXT:    li 29, 0
+; CHECK-NEXT:    # implicit-def: $r5
+; CHECK-NEXT:  .LBB58_2: # %for.body
+; CHECK-NEXT:    #
+; CHECK-NEXT:    efscfsi 3, 29
+; CHECK-NEXT:    mr 4, 3
+; CHECK-NEXT:    bl fmaf
+; CHECK-NEXT:    addi 29, 29, 1
+; CHECK-NEXT:    cmplw 30, 29
+; CHECK-NEXT:    mr 5, 3
+; CHECK-NEXT:    bne 0, .LBB58_2
+; CHECK-NEXT:    b .LBB58_4
+; CHECK-NEXT:  .LBB58_3:
+; CHECK-NEXT:    # implicit-def: $r5
+; CHECK-NEXT:  .LBB58_4: # %for.cond.cleanup
+; CHECK-NEXT:    evldd 30, 16(1) # 8-byte Folded Reload
+; CHECK-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
+; CHECK-NEXT:    mr 3, 5
+; CHECK-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
+; CHECK-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
+; CHECK-NEXT:    lwz 0, 52(1)
+; CHECK-NEXT:    addi 1, 1, 48
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+entry:
+  %cmp8 = icmp sgt i32 %d, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %e.0.lcssa = phi float [ undef, %entry ], [ %0, %for.body ]
+  ret float %e.0.lcssa
+
+for.body:                                         ; preds = %for.body, %entry
+  %f.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %e.09 = phi float [ %0, %for.body ], [ undef, %entry ]
+  %conv = sitofp i32 %f.010 to float
+  %0 = tail call float @llvm.fma.f32(float %conv, float %conv, float %e.09)
+  %inc = add nuw nsw i32 %f.010, 1
+  %exitcond = icmp eq i32 %inc, %d
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare float @llvm.fma.f32(float, float, float) #1
+
+attributes #1 = { nounwind readnone speculatable willreturn }
Index: llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -315,6 +315,11 @@
               return true;
             else
               continue; // ISD::FCOPYSIGN is never a library call.
+          case Intrinsic::fma:
+            if (ST->hasSPE())
+              return true; // ISD::FMA might be a libcall on SPE
+            else
+              continue;
           case Intrinsic::sqrt:               Opcode = ISD::FSQRT;      break;
           case Intrinsic::floor:              Opcode = ISD::FFLOOR;     break;
           case Intrinsic::ceil:               Opcode = ISD::FCEIL;      break;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D78668.259386.patch
Type: text/x-patch
Size: 3798 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200422/92689690/attachment.bin>