[llvm-branch-commits] [llvm] 09ec80e - [PowerPC] Treat llvm.fmuladd intrinsic as using CTR
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jun 2 13:56:33 PDT 2022
Author: Qiu Chaofan
Date: 2022-06-02T13:55:14-07:00
New Revision: 09ec80e16f475bd1f719bf82aade8cc8b4974187
URL: https://github.com/llvm/llvm-project/commit/09ec80e16f475bd1f719bf82aade8cc8b4974187
DIFF: https://github.com/llvm/llvm-project/commit/09ec80e16f475bd1f719bf82aade8cc8b4974187.diff
LOG: [PowerPC] Treat llvm.fmuladd intrinsic as using CTR
This fixes bug 55463, similar to D78668. This is a temporary fix since
we will switch to post-isel CTR loop determination in the future.
Reviewed By: dim, shchenz
Differential Revision: https://reviews.llvm.org/D125746
(cherry picked from commit d9d15af7873fe16d7a0dde4def30f40fa9901777)
Added:
llvm/test/CodeGen/PowerPC/pr55463.ll
Modified:
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index cc5738a5d7b63..48be7e0860df6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -491,15 +491,13 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::experimental_constrained_sin:
case Intrinsic::experimental_constrained_cos:
return true;
- // There is no corresponding FMA instruction for PPC double double.
- // Thus, we need to disable CTR loop generation for this type.
- case Intrinsic::fmuladd:
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
isPPC_FP128Ty())
return true;
else
continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::fmuladd:
case Intrinsic::fma: Opcode = ISD::FMA; break;
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
diff --git a/llvm/test/CodeGen/PowerPC/pr55463.ll b/llvm/test/CodeGen/PowerPC/pr55463.ll
new file mode 100644
index 0000000000000..63d4170da2d54
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr55463.ll
@@ -0,0 +1,136 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpcspe -verify-machineinstrs < %s | FileCheck %s
+
+define void @baz() #0 {
+; CHECK-LABEL: baz:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stw 0, 4(1)
+; CHECK-NEXT: stwu 1, -48(1)
+; CHECK-NEXT: li 3, .LCPI0_0 at l
+; CHECK-NEXT: li 5, .LCPI0_1 at l
+; CHECK-NEXT: lis 4, .LCPI0_0 at ha
+; CHECK-NEXT: lis 6, .LCPI0_1 at ha
+; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill
+; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill
+; CHECK-NEXT: evlddx 30, 4, 3
+; CHECK-NEXT: # implicit-def: $r3
+; CHECK-NEXT: evlddx 29, 6, 5
+; CHECK-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill
+; CHECK-NEXT: # implicit-def: $r28
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %bb1
+; CHECK-NEXT: #
+; CHECK-NEXT: efdcfsi 8, 3
+; CHECK-NEXT: mr 4, 30
+; CHECK-NEXT: mr 6, 29
+; CHECK-NEXT: evmergehi 3, 30, 30
+; CHECK-NEXT: evmergehi 5, 29, 29
+; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3
+; CHECK-NEXT: # kill: def $r5 killed $r5 killed $s5
+; CHECK-NEXT: evmergehi 7, 8, 8
+; CHECK-NEXT: # kill: def $r8 killed $r8 killed $s8
+; CHECK-NEXT: # kill: def $r7 killed $r7 killed $s7
+; CHECK-NEXT: bl fma
+; CHECK-NEXT: evmergelo 3, 3, 4
+; CHECK-NEXT: addi 5, 28, 1
+; CHECK-NEXT: cmplw 5, 28
+; CHECK-NEXT: mr 28, 5
+; CHECK-NEXT: efdctsiz 3, 3
+; CHECK-NEXT: bge 0, .LBB0_1
+; CHECK-NEXT: # %bb.2: # %bb8
+; CHECK-NEXT: bl wibble
+; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload
+; CHECK-NEXT: evldd 29, 24(1) # 8-byte Folded Reload
+; CHECK-NEXT: evldd 28, 16(1) # 8-byte Folded Reload
+; CHECK-NEXT: lwz 0, 52(1)
+; CHECK-NEXT: addi 1, 1, 48
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
+bb:
+ br label %bb1
+
+bb1:
+ %tmp = phi i32 [ %tmp6, %bb1 ], [ undef, %bb ]
+ %tmp2 = phi i32 [ %tmp3, %bb1 ], [ undef, %bb ]
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = sitofp i32 %tmp to double
+ %tmp5 = tail call double @llvm.fmuladd.f64(double 0.000000e+00, double -0.000000e+00, double %tmp4)
+ %tmp6 = fptosi double %tmp5 to i32
+ %tmp7 = icmp eq i32 %tmp2, 0
+ br i1 %tmp7, label %bb8, label %bb1
+
+bb8:
+ call void @wibble(i32 %tmp6)
+ ret void
+}
+
+define void @wombat() #0 {
+; CHECK-LABEL: wombat:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stw 0, 4(1)
+; CHECK-NEXT: stwu 1, -48(1)
+; CHECK-NEXT: li 3, .LCPI1_0 at l
+; CHECK-NEXT: li 5, .LCPI1_1 at l
+; CHECK-NEXT: lis 4, .LCPI1_0 at ha
+; CHECK-NEXT: lis 6, .LCPI1_1 at ha
+; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill
+; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill
+; CHECK-NEXT: evlddx 30, 4, 3
+; CHECK-NEXT: # implicit-def: $r3
+; CHECK-NEXT: evlddx 29, 6, 5
+; CHECK-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill
+; CHECK-NEXT: # implicit-def: $r28
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB1_1: # %bb1
+; CHECK-NEXT: #
+; CHECK-NEXT: efdcfsi 8, 3
+; CHECK-NEXT: mr 4, 30
+; CHECK-NEXT: mr 6, 29
+; CHECK-NEXT: evmergehi 3, 30, 30
+; CHECK-NEXT: evmergehi 5, 29, 29
+; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3
+; CHECK-NEXT: # kill: def $r5 killed $r5 killed $s5
+; CHECK-NEXT: evmergehi 7, 8, 8
+; CHECK-NEXT: # kill: def $r8 killed $r8 killed $s8
+; CHECK-NEXT: # kill: def $r7 killed $r7 killed $s7
+; CHECK-NEXT: bl fma
+; CHECK-NEXT: evmergelo 3, 3, 4
+; CHECK-NEXT: addi 5, 28, 1
+; CHECK-NEXT: cmplw 5, 28
+; CHECK-NEXT: mr 28, 5
+; CHECK-NEXT: efdctsiz 3, 3
+; CHECK-NEXT: bge 0, .LBB1_1
+; CHECK-NEXT: # %bb.2: # %bb8
+; CHECK-NEXT: bl wibble
+; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload
+; CHECK-NEXT: evldd 29, 24(1) # 8-byte Folded Reload
+; CHECK-NEXT: evldd 28, 16(1) # 8-byte Folded Reload
+; CHECK-NEXT: lwz 0, 52(1)
+; CHECK-NEXT: addi 1, 1, 48
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
+bb:
+ br label %bb1
+
+bb1:
+ %tmp = phi i32 [ %tmp6, %bb1 ], [ undef, %bb ]
+ %tmp2 = phi i32 [ %tmp3, %bb1 ], [ undef, %bb ]
+ %tmp3 = add nsw i32 %tmp2, 1
+ %tmp4 = sitofp i32 %tmp to double
+ %tmp5 = tail call double @llvm.fma.f64(double 0.000000e+00, double -0.000000e+00, double %tmp4)
+ %tmp6 = fptosi double %tmp5 to i32
+ %tmp7 = icmp eq i32 %tmp2, 0
+ br i1 %tmp7, label %bb8, label %bb1
+
+bb8:
+ call void @wibble(i32 %tmp6)
+ ret void
+}
+
+declare void @wibble(i32)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare double @llvm.fma.f64(double, double, double)
+
+attributes #0 = { nounwind }
More information about the llvm-branch-commits
mailing list