[llvm] 30f30e1 - [PowerPC] Fix the none tail call in scalar MASS conversion
Masoud Ataei via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 8 09:01:58 PST 2022
Author: Masoud Ataei
Date: 2022-03-08T08:59:17-08:00
New Revision: 30f30e1c12fa7397338494f7b9938f21fc22961a
URL: https://github.com/llvm/llvm-project/commit/30f30e1c12fa7397338494f7b9938f21fc22961a
DIFF: https://github.com/llvm/llvm-project/commit/30f30e1c12fa7397338494f7b9938f21fc22961a.diff
LOG: [PowerPC] Fix the none tail call in scalar MASS conversion
This patch is proposing a fix for patch https://reviews.llvm.org/D101759
on none tail call math function conversion to MASS call.
Differential: https://reviews.llvm.org/D121016
reviewer: @nemanjai
Added:
llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 7b44ec8b39b92..1d40a64ddaf0b 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17773,6 +17773,7 @@ SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
TargetLowering::CallLoweringInfo CLI(DAG);
EVT RetVT = Op.getValueType();
+ Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
SDValue Callee =
DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
@@ -17787,11 +17788,19 @@ SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
Entry.IsZExt = !Entry.IsSExt;
Args.push_back(Entry);
}
+
+ SDValue InChain = DAG.getEntryNode();
+ SDValue TCChain = InChain;
+ const Function &F = DAG.getMachineFunction().getFunction();
+ bool isTailCall =
+ TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
+ (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
+ if (isTailCall)
+ InChain = TCChain;
CLI.setDebugLoc(SDLoc(Op))
- .setChain(DAG.getEntryNode())
- .setLibCallee(CallingConv::C, RetVT.getTypeForEVT(*DAG.getContext()),
- Callee, std::move(Args))
- .setTailCall(true)
+ .setChain(InChain)
+ .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
+ .setTailCall(isTailCall)
.setSExtResult(SignExtend)
.setZExtResult(!SignExtend)
.setIsPostTypeLegalization(true);
diff --git a/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll b/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
new file mode 100644
index 0000000000000..35e71b02ac81a
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/lower-intrinsics-afn-mass_notail.ll
@@ -0,0 +1,127 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK-LNX %s
+; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck --check-prefix=CHECK-AIX %s
+
+define void @cos_f64(double* %arg) {
+; CHECK-LNX-LABEL: cos_f64:
+; CHECK-LNX: # %bb.0: # %bb
+; CHECK-LNX-NEXT: mflr 0
+; CHECK-LNX-NEXT: .cfi_def_cfa_offset 48
+; CHECK-LNX-NEXT: .cfi_offset lr, 16
+; CHECK-LNX-NEXT: .cfi_offset f31, -8
+; CHECK-LNX-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
+; CHECK-LNX-NEXT: std 0, 16(1)
+; CHECK-LNX-NEXT: stdu 1, -48(1)
+; CHECK-LNX-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-LNX-NEXT: xssqrtdp 31, 0
+; CHECK-LNX-NEXT: lfs 1, .LCPI0_0 at toc@l(3)
+; CHECK-LNX-NEXT: bl __xl_cos
+; CHECK-LNX-NEXT: nop
+; CHECK-LNX-NEXT: xsmuldp 0, 31, 1
+; CHECK-LNX-NEXT: .p2align 4
+; CHECK-LNX-NEXT: .LBB0_1: # %bb2
+; CHECK-LNX-NEXT: #
+; CHECK-LNX-NEXT: stfd 0, 0(3)
+; CHECK-LNX-NEXT: b .LBB0_1
+;
+; CHECK-AIX-LABEL: cos_f64:
+; CHECK-AIX: # %bb.0: # %bb
+; CHECK-AIX-NEXT: mflr 0
+; CHECK-AIX-NEXT: stw 0, 8(1)
+; CHECK-AIX-NEXT: stwu 1, -64(1)
+; CHECK-AIX-NEXT: stfd 31, 56(1) # 8-byte Folded Spill
+; CHECK-AIX-NEXT: bl .sqrt[PR]
+; CHECK-AIX-NEXT: nop
+; CHECK-AIX-NEXT: lwz 3, L..C0(2) # %const.0
+; CHECK-AIX-NEXT: fmr 31, 1
+; CHECK-AIX-NEXT: lfs 0, 0(3)
+; CHECK-AIX-NEXT: fmr 1, 0
+; CHECK-AIX-NEXT: bl .__xl_cos[PR]
+; CHECK-AIX-NEXT: nop
+; CHECK-AIX-NEXT: fmul 0, 31, 1
+; CHECK-AIX-NEXT: L..BB0_1: # %bb2
+; CHECK-AIX-NEXT: #
+; CHECK-AIX-NEXT: stfd 0, 0(3)
+; CHECK-AIX-NEXT: b L..BB0_1
+bb:
+ %i = bitcast double* %arg to i8*
+ %i1 = getelementptr i8, i8* %i, i64 undef
+ br label %bb2
+
+bb2:
+ %i3 = getelementptr inbounds i8, i8* %i1, i64 undef
+ %i4 = bitcast i8* %i3 to double*
+ store double undef, double* %i4, align 8
+ %i5 = getelementptr inbounds i8, i8* %i1, i64 0
+ %i6 = bitcast i8* %i5 to double*
+ %i7 = tail call afn double @llvm.sqrt.f64(double undef)
+ %i8 = fmul afn double undef, 0x401921FB54442D28
+ %i9 = tail call afn double @llvm.cos.f64(double %i8) #2
+ %i10 = fmul afn double %i7, %i9
+ store double %i10, double* %i6, align 8
+ br label %bb2
+}
+
+define void @log_f64(double* %arg) {
+; CHECK-LNX-LABEL: log_f64:
+; CHECK-LNX: # %bb.0: # %bb
+; CHECK-LNX-NEXT: mflr 0
+; CHECK-LNX-NEXT: .cfi_def_cfa_offset 48
+; CHECK-LNX-NEXT: .cfi_offset lr, 16
+; CHECK-LNX-NEXT: .cfi_offset f31, -8
+; CHECK-LNX-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
+; CHECK-LNX-NEXT: std 0, 16(1)
+; CHECK-LNX-NEXT: stdu 1, -48(1)
+; CHECK-LNX-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-LNX-NEXT: xssqrtdp 31, 0
+; CHECK-LNX-NEXT: lfs 1, .LCPI1_0 at toc@l(3)
+; CHECK-LNX-NEXT: bl __xl_log
+; CHECK-LNX-NEXT: nop
+; CHECK-LNX-NEXT: xsmuldp 0, 31, 1
+; CHECK-LNX-NEXT: .p2align 4
+; CHECK-LNX-NEXT: .LBB1_1: # %bb2
+; CHECK-LNX-NEXT: #
+; CHECK-LNX-NEXT: stfd 0, 0(3)
+; CHECK-LNX-NEXT: b .LBB1_1
+;
+; CHECK-AIX-LABEL: log_f64:
+; CHECK-AIX: # %bb.0: # %bb
+; CHECK-AIX-NEXT: mflr 0
+; CHECK-AIX-NEXT: stw 0, 8(1)
+; CHECK-AIX-NEXT: stwu 1, -64(1)
+; CHECK-AIX-NEXT: stfd 31, 56(1) # 8-byte Folded Spill
+; CHECK-AIX-NEXT: bl .sqrt[PR]
+; CHECK-AIX-NEXT: nop
+; CHECK-AIX-NEXT: lwz 3, L..C1(2) # %const.0
+; CHECK-AIX-NEXT: fmr 31, 1
+; CHECK-AIX-NEXT: lfs 0, 0(3)
+; CHECK-AIX-NEXT: fmr 1, 0
+; CHECK-AIX-NEXT: bl .__xl_log[PR]
+; CHECK-AIX-NEXT: nop
+; CHECK-AIX-NEXT: fmul 0, 31, 1
+; CHECK-AIX-NEXT: L..BB1_1: # %bb2
+; CHECK-AIX-NEXT: #
+; CHECK-AIX-NEXT: stfd 0, 0(3)
+; CHECK-AIX-NEXT: b L..BB1_1
+bb:
+ %i = bitcast double* %arg to i8*
+ %i1 = getelementptr i8, i8* %i, i64 undef
+ br label %bb2
+
+bb2:
+ %i3 = getelementptr inbounds i8, i8* %i1, i64 undef
+ %i4 = bitcast i8* %i3 to double*
+ store double undef, double* %i4, align 8
+ %i5 = getelementptr inbounds i8, i8* %i1, i64 0
+ %i6 = bitcast i8* %i5 to double*
+ %i7 = tail call afn double @llvm.sqrt.f64(double undef)
+ %i8 = fmul afn double undef, 0x401921FB54442D28
+ %i9 = tail call afn double @llvm.log.f64(double %i8) #2
+ %i10 = fmul afn double %i7, %i9
+ store double %i10, double* %i6, align 8
+ br label %bb2
+}
+
+declare double @llvm.sqrt.f64(double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.log.f64(double)
diff --git a/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll b/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll
new file mode 100644
index 0000000000000..d89099030c638
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/lower-intrinsics-fast-mass_notail.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK-LNX %s
+; RUN: llc -enable-ppc-gen-scalar-mass -O3 -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck --check-prefix=CHECK-AIX %s
+
+define void @cos_f64(double* %arg) {
+; CHECK-LNX-LABEL: cos_f64:
+; CHECK-LNX: # %bb.0: # %bb
+; CHECK-LNX-NEXT: mflr 0
+; CHECK-LNX-NEXT: std 0, 16(1)
+; CHECK-LNX-NEXT: stdu 1, -32(1)
+; CHECK-LNX-NEXT: .cfi_def_cfa_offset 32
+; CHECK-LNX-NEXT: .cfi_offset lr, 16
+; CHECK-LNX-NEXT: addis 3, 2, .LCPI0_0 at toc@ha
+; CHECK-LNX-NEXT: lfs 1, .LCPI0_0 at toc@l(3)
+; CHECK-LNX-NEXT: bl __xl_cos_finite
+; CHECK-LNX-NEXT: nop
+; CHECK-LNX-NEXT: xssqrtdp 0, 0
+; CHECK-LNX-NEXT: xsmuldp 0, 0, 1
+; CHECK-LNX-NEXT: .p2align 4
+; CHECK-LNX-NEXT: .LBB0_1: # %bb2
+; CHECK-LNX-NEXT: #
+; CHECK-LNX-NEXT: stfd 0, 0(3)
+; CHECK-LNX-NEXT: b .LBB0_1
+;
+; CHECK-AIX-LABEL: cos_f64:
+; CHECK-AIX: # %bb.0: # %bb
+; CHECK-AIX-NEXT: mflr 0
+; CHECK-AIX-NEXT: stw 0, 8(1)
+; CHECK-AIX-NEXT: stwu 1, -64(1)
+; CHECK-AIX-NEXT: stfd 31, 56(1) # 8-byte Folded Spill
+; CHECK-AIX-NEXT: bl .sqrt[PR]
+; CHECK-AIX-NEXT: nop
+; CHECK-AIX-NEXT: lwz 3, L..C0(2) # %const.0
+; CHECK-AIX-NEXT: fmr 31, 1
+; CHECK-AIX-NEXT: lfs 0, 0(3)
+; CHECK-AIX-NEXT: fmr 1, 0
+; CHECK-AIX-NEXT: bl .__xl_cos_finite[PR]
+; CHECK-AIX-NEXT: nop
+; CHECK-AIX-NEXT: fmul 0, 31, 1
+; CHECK-AIX-NEXT: L..BB0_1: # %bb2
+; CHECK-AIX-NEXT: #
+; CHECK-AIX-NEXT: stfd 0, 0(3)
+; CHECK-AIX-NEXT: b L..BB0_1
+bb:
+ %i = bitcast double* %arg to i8*
+ %i1 = getelementptr i8, i8* %i, i64 undef
+ br label %bb2
+
+bb2:
+ %i3 = getelementptr inbounds i8, i8* %i1, i64 undef
+ %i4 = bitcast i8* %i3 to double*
+ store double undef, double* %i4, align 8
+ %i5 = getelementptr inbounds i8, i8* %i1, i64 0
+ %i6 = bitcast i8* %i5 to double*
+ %i7 = tail call fast double @llvm.sqrt.f64(double undef)
+ %i8 = fmul fast double undef, 0x401921FB54442D28
+ %i9 = tail call fast double @llvm.cos.f64(double %i8) #2
+ %i10 = fmul fast double %i7, %i9
+ store double %i10, double* %i6, align 8
+ br label %bb2
+}
+
+define void @log_f64(double* %arg) {
+; CHECK-LNX-LABEL: log_f64:
+; CHECK-LNX: # %bb.0: # %bb
+; CHECK-LNX-NEXT: mflr 0
+; CHECK-LNX-NEXT: std 0, 16(1)
+; CHECK-LNX-NEXT: stdu 1, -32(1)
+; CHECK-LNX-NEXT: .cfi_def_cfa_offset 32
+; CHECK-LNX-NEXT: .cfi_offset lr, 16
+; CHECK-LNX-NEXT: addis 3, 2, .LCPI1_0 at toc@ha
+; CHECK-LNX-NEXT: lfs 1, .LCPI1_0 at toc@l(3)
+; CHECK-LNX-NEXT: bl __xl_log_finite
+; CHECK-LNX-NEXT: nop
+; CHECK-LNX-NEXT: xssqrtdp 0, 0
+; CHECK-LNX-NEXT: xsmuldp 0, 0, 1
+; CHECK-LNX-NEXT: .p2align 4
+; CHECK-LNX-NEXT: .LBB1_1: # %bb2
+; CHECK-LNX-NEXT: #
+; CHECK-LNX-NEXT: stfd 0, 0(3)
+; CHECK-LNX-NEXT: b .LBB1_1
+;
+; CHECK-AIX-LABEL: log_f64:
+; CHECK-AIX: # %bb.0: # %bb
+; CHECK-AIX-NEXT: mflr 0
+; CHECK-AIX-NEXT: stw 0, 8(1)
+; CHECK-AIX-NEXT: stwu 1, -64(1)
+; CHECK-AIX-NEXT: stfd 31, 56(1) # 8-byte Folded Spill
+; CHECK-AIX-NEXT: bl .sqrt[PR]
+; CHECK-AIX-NEXT: nop
+; CHECK-AIX-NEXT: lwz 3, L..C1(2) # %const.0
+; CHECK-AIX-NEXT: fmr 31, 1
+; CHECK-AIX-NEXT: lfs 0, 0(3)
+; CHECK-AIX-NEXT: fmr 1, 0
+; CHECK-AIX-NEXT: bl .__xl_log_finite[PR]
+; CHECK-AIX-NEXT: nop
+; CHECK-AIX-NEXT: fmul 0, 31, 1
+; CHECK-AIX-NEXT: L..BB1_1: # %bb2
+; CHECK-AIX-NEXT: #
+; CHECK-AIX-NEXT: stfd 0, 0(3)
+; CHECK-AIX-NEXT: b L..BB1_1
+bb:
+ %i = bitcast double* %arg to i8*
+ %i1 = getelementptr i8, i8* %i, i64 undef
+ br label %bb2
+
+bb2:
+ %i3 = getelementptr inbounds i8, i8* %i1, i64 undef
+ %i4 = bitcast i8* %i3 to double*
+ store double undef, double* %i4, align 8
+ %i5 = getelementptr inbounds i8, i8* %i1, i64 0
+ %i6 = bitcast i8* %i5 to double*
+ %i7 = tail call fast double @llvm.sqrt.f64(double undef)
+ %i8 = fmul fast double undef, 0x401921FB54442D28
+ %i9 = tail call fast double @llvm.log.f64(double %i8) #2
+ %i10 = fmul fast double %i7, %i9
+ store double %i10, double* %i6, align 8
+ br label %bb2
+}
+
+declare double @llvm.sqrt.f64(double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.log.f64(double)
More information about the llvm-commits
mailing list