[llvm] 1f852ba - [PowerPC] Avoid unnecessary fadd for unsigned to ppcf128
Qiu Chaofan via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 1 07:25:32 PST 2020
Author: Qiu Chaofan
Date: 2020-11-01T23:22:47+08:00
New Revision: 1f852ba8534cc773c114165e77acba411ac25a93
URL: https://github.com/llvm/llvm-project/commit/1f852ba8534cc773c114165e77acba411ac25a93
DIFF: https://github.com/llvm/llvm-project/commit/1f852ba8534cc773c114165e77acba411ac25a93.diff
LOG: [PowerPC] Avoid unnecessary fadd for unsigned to ppcf128
Unsigned 32-bit or shorter integer to ppcf128 conversion are currently
expanded as signed-to-double with an extra fadd to 'complement'. But on
PowerPC we have native instruction to directly convert unsigned to
double since ISA v2.06. This patch exploits it.
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D89786
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
llvm/test/CodeGen/PowerPC/uint-to-ppcfp128-crash.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b8360290b3ca..1377a25d6a7e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1636,16 +1636,14 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
// though.
if (SrcVT.bitsLE(MVT::i32)) {
// The integer can be represented exactly in an f64.
- Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
- MVT::i32, Src);
Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
APInt(NVT.getSizeInBits(), 0)), dl, NVT);
if (Strict) {
- Hi = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl,
- DAG.getVTList(NVT, MVT::Other), {Chain, Src}, Flags);
+ Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, MVT::Other),
+ {Chain, Src}, Flags);
Chain = Hi.getValue(1);
} else
- Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+ Hi = DAG.getNode(N->getOpcode(), dl, NVT, Src);
} else {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (SrcVT.bitsLE(MVT::i64)) {
@@ -1667,7 +1665,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
GetPairElements(Tmp.first, Lo, Hi);
}
- if (isSigned) {
+ // No need to complement for unsigned 32-bit integers
+ if (isSigned || SrcVT.bitsLE(MVT::i32)) {
if (Strict)
ReplaceValueWith(SDValue(N, 1), Chain);
diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
index 5ab12093954f..864a573896b2 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
@@ -1683,107 +1683,29 @@ entry:
define ppc_fp128 @u32_to_ppcq(i32 zeroext %m) #0 {
; PC64LE-LABEL: u32_to_ppcq:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 30, -24(1) # 8-byte Folded Spill
-; PC64LE-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -64(1)
-; PC64LE-NEXT: mr 30, 3
-; PC64LE-NEXT: addis 3, 2, .LCPI35_0 at toc@ha
-; PC64LE-NEXT: xxlxor 2, 2, 2
-; PC64LE-NEXT: mtfprwa 0, 30
-; PC64LE-NEXT: lfs 3, .LCPI35_0 at toc@l(3)
-; PC64LE-NEXT: xxlxor 4, 4, 4
-; PC64LE-NEXT: xscvsxddp 31, 0
-; PC64LE-NEXT: fmr 1, 31
-; PC64LE-NEXT: bl __gcc_qadd
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: cmpwi 30, 0
-; PC64LE-NEXT: blt 0, .LBB35_2
-; PC64LE-NEXT: # %bb.1: # %entry
-; PC64LE-NEXT: fmr 1, 31
-; PC64LE-NEXT: .LBB35_2: # %entry
-; PC64LE-NEXT: blt 0, .LBB35_4
-; PC64LE-NEXT: # %bb.3: # %entry
+; PC64LE-NEXT: mtfprwz 0, 3
; PC64LE-NEXT: xxlxor 2, 2, 2
-; PC64LE-NEXT: .LBB35_4: # %entry
-; PC64LE-NEXT: addi 1, 1, 64
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE-NEXT: ld 30, -24(1) # 8-byte Folded Reload
-; PC64LE-NEXT: mtlr 0
+; PC64LE-NEXT: xscvuxddp 1, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: u32_to_ppcq:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 30, -24(1) # 8-byte Folded Spill
-; PC64LE9-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -64(1)
-; PC64LE9-NEXT: mr 30, 3
-; PC64LE9-NEXT: addis 3, 2, .LCPI35_0 at toc@ha
+; PC64LE9-NEXT: mtfprwz 0, 3
; PC64LE9-NEXT: xxlxor 2, 2, 2
-; PC64LE9-NEXT: mtfprwa 0, 30
-; PC64LE9-NEXT: lfs 3, .LCPI35_0 at toc@l(3)
-; PC64LE9-NEXT: xscvsxddp 31, 0
-; PC64LE9-NEXT: xxlxor 4, 4, 4
-; PC64LE9-NEXT: fmr 1, 31
-; PC64LE9-NEXT: bl __gcc_qadd
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: cmpwi 30, 0
-; PC64LE9-NEXT: blt 0, .LBB35_2
-; PC64LE9-NEXT: # %bb.1: # %entry
-; PC64LE9-NEXT: fmr 1, 31
-; PC64LE9-NEXT: .LBB35_2: # %entry
-; PC64LE9-NEXT: blt 0, .LBB35_4
-; PC64LE9-NEXT: # %bb.3: # %entry
-; PC64LE9-NEXT: xxlxor 2, 2, 2
-; PC64LE9-NEXT: .LBB35_4: # %entry
-; PC64LE9-NEXT: addi 1, 1, 64
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: lfd 31, -8(1) # 8-byte Folded Reload
-; PC64LE9-NEXT: ld 30, -24(1) # 8-byte Folded Reload
-; PC64LE9-NEXT: mtlr 0
+; PC64LE9-NEXT: xscvuxddp 1, 0
; PC64LE9-NEXT: blr
;
; PC64-LABEL: u32_to_ppcq:
; PC64: # %bb.0: # %entry
-; PC64-NEXT: mflr 0
-; PC64-NEXT: std 0, 16(1)
-; PC64-NEXT: stdu 1, -160(1)
-; PC64-NEXT: std 30, 128(1) # 8-byte Folded Spill
-; PC64-NEXT: mr 30, 3
-; PC64-NEXT: extsw 3, 3
-; PC64-NEXT: std 3, 120(1)
+; PC64-NEXT: lis 4, 17200
+; PC64-NEXT: stw 3, -4(1)
; PC64-NEXT: addis 3, 2, .LCPI35_0 at toc@ha
-; PC64-NEXT: stfd 31, 152(1) # 8-byte Folded Spill
-; PC64-NEXT: lfd 0, 120(1)
-; PC64-NEXT: lfs 3, .LCPI35_0 at toc@l(3)
+; PC64-NEXT: stw 4, -8(1)
+; PC64-NEXT: lfs 0, .LCPI35_0 at toc@l(3)
; PC64-NEXT: addis 3, 2, .LCPI35_1 at toc@ha
-; PC64-NEXT: lfs 31, .LCPI35_1 at toc@l(3)
-; PC64-NEXT: stfd 30, 144(1) # 8-byte Folded Spill
-; PC64-NEXT: fcfid 30, 0
-; PC64-NEXT: fmr 1, 30
-; PC64-NEXT: fmr 2, 31
-; PC64-NEXT: fmr 4, 31
-; PC64-NEXT: bl __gcc_qadd
-; PC64-NEXT: nop
-; PC64-NEXT: cmpwi 30, 0
-; PC64-NEXT: blt 0, .LBB35_2
-; PC64-NEXT: # %bb.1: # %entry
-; PC64-NEXT: fmr 1, 30
-; PC64-NEXT: .LBB35_2: # %entry
-; PC64-NEXT: blt 0, .LBB35_4
-; PC64-NEXT: # %bb.3: # %entry
-; PC64-NEXT: fmr 2, 31
-; PC64-NEXT: .LBB35_4: # %entry
-; PC64-NEXT: lfd 31, 152(1) # 8-byte Folded Reload
-; PC64-NEXT: ld 30, 128(1) # 8-byte Folded Reload
-; PC64-NEXT: lfd 30, 144(1) # 8-byte Folded Reload
-; PC64-NEXT: addi 1, 1, 160
-; PC64-NEXT: ld 0, 16(1)
-; PC64-NEXT: mtlr 0
+; PC64-NEXT: lfd 1, -8(1)
+; PC64-NEXT: lfs 2, .LCPI35_1 at toc@l(3)
+; PC64-NEXT: fsub 1, 1, 0
; PC64-NEXT: blr
entry:
%conv = tail call ppc_fp128 @llvm.experimental.constrained.uitofp.ppcf128.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #1
diff --git a/llvm/test/CodeGen/PowerPC/uint-to-ppcfp128-crash.ll b/llvm/test/CodeGen/PowerPC/uint-to-ppcfp128-crash.ll
index bb15367e43d9..04e4c0f21262 100644
--- a/llvm/test/CodeGen/PowerPC/uint-to-ppcfp128-crash.ll
+++ b/llvm/test/CodeGen/PowerPC/uint-to-ppcfp128-crash.ll
@@ -1,15 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
; Ensure we don't crash by trying to convert directly from a subword load
; to a ppc_fp128 as we do for conversions to f32/f64.
define ppc_fp128 @test(i16* nocapture readonly %Ptr) {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: lhz 3, 0(3)
+; CHECK-NEXT: xxlxor 2, 2, 2
+; CHECK-NEXT: stw 3, -4(1)
+; CHECK-NEXT: addi 3, 1, -4
+; CHECK-NEXT: lfiwzx 0, 0, 3
+; CHECK-NEXT: xscvuxddp 1, 0
+; CHECK-NEXT: blr
entry:
%0 = load i16, i16* %Ptr, align 2
%conv = uitofp i16 %0 to ppc_fp128
ret ppc_fp128 %conv
-; CHECK: lhz [[LD:[0-9]+]], 0(3)
-; CHECK: mtfprwa [[MV:[0-9]+]], [[LD]]
-; CHECK: xscvsxddp [[CONV:[0-9]+]], [[MV]]
-; CHECK: bl __gcc_qadd
}
More information about the llvm-commits
mailing list