[llvm] 30f30e1 - [PowerPC] Fix the none tail call in scalar MASS conversion

Tue Mar 8 09:01:58 PST 2022

Author: Masoud Ataei
Date: 2022-03-08T08:59:17-08:00
New Revision: 30f30e1c12fa7397338494f7b9938f21fc22961a

URL: https://github.com/llvm/llvm-project/commit/30f30e1c12fa7397338494f7b9938f21fc22961a
DIFF: https://github.com/llvm/llvm-project/commit/30f30e1c12fa7397338494f7b9938f21fc22961a.diff

LOG: [PowerPC] Fix the none tail call in scalar MASS conversion
This patch is proposing a fix for patch https://reviews.llvm.org/D101759
on none tail call math function conversion to MASS call.

Differential: https://reviews.llvm.org/D121016

reviewer: @nemanjai

Added: 
    llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
    llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 7b44ec8b39b92..1d40a64ddaf0b 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17773,6 +17773,7 @@ SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   TargetLowering::CallLoweringInfo CLI(DAG);
   EVT RetVT = Op.getValueType();
+  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   SDValue Callee =
       DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
   bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
@@ -17787,11 +17788,19 @@ SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
     Entry.IsZExt = !Entry.IsSExt;
     Args.push_back(Entry);
   }
+
+  SDValue InChain = DAG.getEntryNode();
+  SDValue TCChain = InChain;
+  const Function &F = DAG.getMachineFunction().getFunction();
+  bool isTailCall =
+      TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
+      (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
+  if (isTailCall)
+    InChain = TCChain;
   CLI.setDebugLoc(SDLoc(Op))
-      .setChain(DAG.getEntryNode())
-      .setLibCallee(CallingConv::C, RetVT.getTypeForEVT(*DAG.getContext()),
-                    Callee, std::move(Args))
-      .setTailCall(true)
+      .setChain(InChain)
+      .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
+      .setTailCall(isTailCall)
       .setSExtResult(SignExtend)
       .setZExtResult(!SignExtend)
       .setIsPostTypeLegalization(true);

diff  --git a/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll b/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
new file mode 100644
index 0000000000000..35e71b02ac81a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK-LNX %s
+; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck --check-prefix=CHECK-AIX %s
+
+define void @cos_f64(double* %arg) {
+; CHECK-LNX-LABEL: cos_f64:
+; CHECK-LNX:       # %bb.0: # %bb
+; CHECK-LNX-NEXT:    mflr 0
+; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-LNX-NEXT:    .cfi_offset lr, 16
+; CHECK-LNX-NEXT:    .cfi_offset f31, -8
+; CHECK-LNX-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; CHECK-LNX-NEXT:    std 0, 16(1)
+; CHECK-LNX-NEXT:    stdu 1, -48(1)
+; CHECK-LNX-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-LNX-NEXT:    xssqrtdp 31, 0
+; CHECK-LNX-NEXT:    lfs 1, .LCPI0_0 at toc@l(3)
+; CHECK-LNX-NEXT:    bl __xl_cos
+; CHECK-LNX-NEXT:    nop
+; CHECK-LNX-NEXT:    xsmuldp 0, 31, 1
+; CHECK-LNX-NEXT:    .p2align 4
+; CHECK-LNX-NEXT:  .LBB0_1: # %bb2
+; CHECK-LNX-NEXT:    #
+; CHECK-LNX-NEXT:    stfd 0, 0(3)
+; CHECK-LNX-NEXT:    b .LBB0_1
+;
+; CHECK-AIX-LABEL: cos_f64:
+; CHECK-AIX:       # %bb.0: # %bb
+; CHECK-AIX-NEXT:    mflr 0
+; CHECK-AIX-NEXT:    stw 0, 8(1)
+; CHECK-AIX-NEXT:    stwu 1, -64(1)
+; CHECK-AIX-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
+; CHECK-AIX-NEXT:    bl .sqrt[PR]
+; CHECK-AIX-NEXT:    nop
+; CHECK-AIX-NEXT:    lwz 3, L..C0(2) # %const.0
+; CHECK-AIX-NEXT:    fmr 31, 1
+; CHECK-AIX-NEXT:    lfs 0, 0(3)
+; CHECK-AIX-NEXT:    fmr 1, 0
+; CHECK-AIX-NEXT:    bl .__xl_cos[PR]
+; CHECK-AIX-NEXT:    nop
+; CHECK-AIX-NEXT:    fmul 0, 31, 1
+; CHECK-AIX-NEXT:  L..BB0_1: # %bb2
+; CHECK-AIX-NEXT:    #
+; CHECK-AIX-NEXT:    stfd 0, 0(3)
+; CHECK-AIX-NEXT:    b L..BB0_1
+bb:
+  %i = bitcast double* %arg to i8*
+  %i1 = getelementptr i8, i8* %i, i64 undef
+  br label %bb2
+
+bb2:
+  %i3 = getelementptr inbounds i8, i8* %i1, i64 undef
+  %i4 = bitcast i8* %i3 to double*
+  store double undef, double* %i4, align 8
+  %i5 = getelementptr inbounds i8, i8* %i1, i64 0
+  %i6 = bitcast i8* %i5 to double*
+  %i7 = tail call afn double @llvm.sqrt.f64(double undef)
+  %i8 = fmul afn double undef, 0x401921FB54442D28
+  %i9 = tail call afn double @llvm.cos.f64(double %i8) #2
+  %i10 = fmul afn double %i7, %i9
+  store double %i10, double* %i6, align 8
+  br label %bb2
+}
+
+define void @log_f64(double* %arg) {
+; CHECK-LNX-LABEL: log_f64:
+; CHECK-LNX:       # %bb.0: # %bb
+; CHECK-LNX-NEXT:    mflr 0
+; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-LNX-NEXT:    .cfi_offset lr, 16
+; CHECK-LNX-NEXT:    .cfi_offset f31, -8
+; CHECK-LNX-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; CHECK-LNX-NEXT:    std 0, 16(1)
+; CHECK-LNX-NEXT:    stdu 1, -48(1)
+; CHECK-LNX-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-LNX-NEXT:    xssqrtdp 31, 0
+; CHECK-LNX-NEXT:    lfs 1, .LCPI1_0 at toc@l(3)
+; CHECK-LNX-NEXT:    bl __xl_log
+; CHECK-LNX-NEXT:    nop
+; CHECK-LNX-NEXT:    xsmuldp 0, 31, 1
+; CHECK-LNX-NEXT:    .p2align 4
+; CHECK-LNX-NEXT:  .LBB1_1: # %bb2
+; CHECK-LNX-NEXT:    #
+; CHECK-LNX-NEXT:    stfd 0, 0(3)
+; CHECK-LNX-NEXT:    b .LBB1_1
+;
+; CHECK-AIX-LABEL: log_f64:
+; CHECK-AIX:       # %bb.0: # %bb
+; CHECK-AIX-NEXT:    mflr 0
+; CHECK-AIX-NEXT:    stw 0, 8(1)
+; CHECK-AIX-NEXT:    stwu 1, -64(1)
+; CHECK-AIX-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
+; CHECK-AIX-NEXT:    bl .sqrt[PR]
+; CHECK-AIX-NEXT:    nop
+; CHECK-AIX-NEXT:    lwz 3, L..C1(2) # %const.0
+; CHECK-AIX-NEXT:    fmr 31, 1
+; CHECK-AIX-NEXT:    lfs 0, 0(3)
+; CHECK-AIX-NEXT:    fmr 1, 0
+; CHECK-AIX-NEXT:    bl .__xl_log[PR]
+; CHECK-AIX-NEXT:    nop
+; CHECK-AIX-NEXT:    fmul 0, 31, 1
+; CHECK-AIX-NEXT:  L..BB1_1: # %bb2
+; CHECK-AIX-NEXT:    #
+; CHECK-AIX-NEXT:    stfd 0, 0(3)
+; CHECK-AIX-NEXT:    b L..BB1_1
+bb:
+  %i = bitcast double* %arg to i8*
+  %i1 = getelementptr i8, i8* %i, i64 undef
+  br label %bb2
+
+bb2:
+  %i3 = getelementptr inbounds i8, i8* %i1, i64 undef
+  %i4 = bitcast i8* %i3 to double*
+  store double undef, double* %i4, align 8
+  %i5 = getelementptr inbounds i8, i8* %i1, i64 0
+  %i6 = bitcast i8* %i5 to double*
+  %i7 = tail call afn double @llvm.sqrt.f64(double undef)
+  %i8 = fmul afn double undef, 0x401921FB54442D28
+  %i9 = tail call afn double @llvm.log.f64(double %i8) #2
+  %i10 = fmul afn double %i7, %i9
+  store double %i10, double* %i6, align 8
+  br label %bb2
+}
+
+declare double @llvm.sqrt.f64(double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.log.f64(double)

diff  --git a/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll b/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll
new file mode 100644
index 0000000000000..d89099030c638
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK-LNX %s
+; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck --check-prefix=CHECK-AIX %s
+
+define void @cos_f64(double* %arg) {
+; CHECK-LNX-LABEL: cos_f64:
+; CHECK-LNX:       # %bb.0: # %bb
+; CHECK-LNX-NEXT:    mflr 0
+; CHECK-LNX-NEXT:    std 0, 16(1)
+; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-LNX-NEXT:    .cfi_offset lr, 16
+; CHECK-LNX-NEXT:    addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-LNX-NEXT:    lfs 1, .LCPI0_0 at toc@l(3)
+; CHECK-LNX-NEXT:    bl __xl_cos_finite
+; CHECK-LNX-NEXT:    nop
+; CHECK-LNX-NEXT:    xssqrtdp 0, 0
+; CHECK-LNX-NEXT:    xsmuldp 0, 0, 1
+; CHECK-LNX-NEXT:    .p2align 4
+; CHECK-LNX-NEXT:  .LBB0_1: # %bb2
+; CHECK-LNX-NEXT:    #
+; CHECK-LNX-NEXT:    stfd 0, 0(3)
+; CHECK-LNX-NEXT:    b .LBB0_1
+;
+; CHECK-AIX-LABEL: cos_f64:
+; CHECK-AIX:       # %bb.0: # %bb
+; CHECK-AIX-NEXT:    mflr 0
+; CHECK-AIX-NEXT:    stw 0, 8(1)
+; CHECK-AIX-NEXT:    stwu 1, -64(1)
+; CHECK-AIX-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
+; CHECK-AIX-NEXT:    bl .sqrt[PR]
+; CHECK-AIX-NEXT:    nop
+; CHECK-AIX-NEXT:    lwz 3, L..C0(2) # %const.0
+; CHECK-AIX-NEXT:    fmr 31, 1
+; CHECK-AIX-NEXT:    lfs 0, 0(3)
+; CHECK-AIX-NEXT:    fmr 1, 0
+; CHECK-AIX-NEXT:    bl .__xl_cos_finite[PR]
+; CHECK-AIX-NEXT:    nop
+; CHECK-AIX-NEXT:    fmul 0, 31, 1
+; CHECK-AIX-NEXT:  L..BB0_1: # %bb2
+; CHECK-AIX-NEXT:    #
+; CHECK-AIX-NEXT:    stfd 0, 0(3)
+; CHECK-AIX-NEXT:    b L..BB0_1
+bb:
+  %i = bitcast double* %arg to i8*
+  %i1 = getelementptr i8, i8* %i, i64 undef
+  br label %bb2
+
+bb2:
+  %i3 = getelementptr inbounds i8, i8* %i1, i64 undef
+  %i4 = bitcast i8* %i3 to double*
+  store double undef, double* %i4, align 8
+  %i5 = getelementptr inbounds i8, i8* %i1, i64 0
+  %i6 = bitcast i8* %i5 to double*
+  %i7 = tail call fast double @llvm.sqrt.f64(double undef)
+  %i8 = fmul fast double undef, 0x401921FB54442D28
+  %i9 = tail call fast double @llvm.cos.f64(double %i8) #2
+  %i10 = fmul fast double %i7, %i9
+  store double %i10, double* %i6, align 8
+  br label %bb2
+}
+
+define void @log_f64(double* %arg) {
+; CHECK-LNX-LABEL: log_f64:
+; CHECK-LNX:       # %bb.0: # %bb
+; CHECK-LNX-NEXT:    mflr 0
+; CHECK-LNX-NEXT:    std 0, 16(1)
+; CHECK-LNX-NEXT:    stdu 1, -32(1)
+; CHECK-LNX-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-LNX-NEXT:    .cfi_offset lr, 16
+; CHECK-LNX-NEXT:    addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-LNX-NEXT:    lfs 1, .LCPI1_0 at toc@l(3)
+; CHECK-LNX-NEXT:    bl __xl_log_finite
+; CHECK-LNX-NEXT:    nop
+; CHECK-LNX-NEXT:    xssqrtdp 0, 0
+; CHECK-LNX-NEXT:    xsmuldp 0, 0, 1
+; CHECK-LNX-NEXT:    .p2align 4
+; CHECK-LNX-NEXT:  .LBB1_1: # %bb2
+; CHECK-LNX-NEXT:    #
+; CHECK-LNX-NEXT:    stfd 0, 0(3)
+; CHECK-LNX-NEXT:    b .LBB1_1
+;
+; CHECK-AIX-LABEL: log_f64:
+; CHECK-AIX:       # %bb.0: # %bb
+; CHECK-AIX-NEXT:    mflr 0
+; CHECK-AIX-NEXT:    stw 0, 8(1)
+; CHECK-AIX-NEXT:    stwu 1, -64(1)
+; CHECK-AIX-NEXT:    stfd 31, 56(1) # 8-byte Folded Spill
+; CHECK-AIX-NEXT:    bl .sqrt[PR]
+; CHECK-AIX-NEXT:    nop
+; CHECK-AIX-NEXT:    lwz 3, L..C1(2) # %const.0
+; CHECK-AIX-NEXT:    fmr 31, 1
+; CHECK-AIX-NEXT:    lfs 0, 0(3)
+; CHECK-AIX-NEXT:    fmr 1, 0
+; CHECK-AIX-NEXT:    bl .__xl_log_finite[PR]
+; CHECK-AIX-NEXT:    nop
+; CHECK-AIX-NEXT:    fmul 0, 31, 1
+; CHECK-AIX-NEXT:  L..BB1_1: # %bb2
+; CHECK-AIX-NEXT:    #
+; CHECK-AIX-NEXT:    stfd 0, 0(3)
+; CHECK-AIX-NEXT:    b L..BB1_1
+bb:
+  %i = bitcast double* %arg to i8*
+  %i1 = getelementptr i8, i8* %i, i64 undef
+  br label %bb2
+
+bb2:
+  %i3 = getelementptr inbounds i8, i8* %i1, i64 undef
+  %i4 = bitcast i8* %i3 to double*
+  store double undef, double* %i4, align 8
+  %i5 = getelementptr inbounds i8, i8* %i1, i64 0
+  %i6 = bitcast i8* %i5 to double*
+  %i7 = tail call fast double @llvm.sqrt.f64(double undef)
+  %i8 = fmul fast double undef, 0x401921FB54442D28
+  %i9 = tail call fast double @llvm.log.f64(double %i8) #2
+  %i10 = fmul fast double %i7, %i9
+  store double %i10, double* %i6, align 8
+  br label %bb2
+}
+
+declare double @llvm.sqrt.f64(double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.log.f64(double)