[llvm] 781b78a - [PowerPC] Only legalize FNEARBYINT with unsafe fp math

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 2 11:46:40 PST 2020


Author: Nemanja Ivanovic
Date: 2020-01-02T13:45:54-06:00
New Revision: 781b78a3610886e265f08d6de625bfe19e3dd8cf

URL: https://github.com/llvm/llvm-project/commit/781b78a3610886e265f08d6de625bfe19e3dd8cf
DIFF: https://github.com/llvm/llvm-project/commit/781b78a3610886e265f08d6de625bfe19e3dd8cf.diff

LOG: [PowerPC] Only legalize FNEARBYINT with unsafe fp math

Commit 0f0330a78709 legalized these nodes on PPC without consideration of
unsafe math which means that we get inexact exceptions raised for nearbyint.
Since this doesn't conform to the standard, switch this legalization to depend
on unsafe fp math.

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
    llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9fab29dded75..df2234eea289 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -782,17 +782,22 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       }
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 
+      // The nearbyint variants are not allowed to raise the inexact exception
+      // so we can only code-gen them with unsafe math.
+      if (TM.Options.UnsafeFPMath) {
+        setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+        setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+      }
+
       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
-      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
       setOperationAction(ISD::FROUND, MVT::f64, Legal);
 
       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
-      setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
       setOperationAction(ISD::FROUND, MVT::f32, Legal);
 
       setOperationAction(ISD::MUL, MVT::v2f64, Legal);

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
index 94625fcf551b..7f18853b8b99 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll
@@ -342,14 +342,14 @@ declare i64 @llvm.llround.i64.f32(float)
 
 define dso_local double @test_nearbyint(double %d) local_unnamed_addr {
 ; BE-LABEL: test_nearbyint:
-; BE:       # %bb.0: # %entry
-; BE-NEXT:    xsrdpic f1, f1
-; BE-NEXT:    blr
+; BE:    # %bb.0: # %entry
+; BE:    bl nearbyint
+; BE:    blr
 ;
 ; CHECK-LABEL: test_nearbyint:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xsrdpic f1, f1
-; CHECK-NEXT:    blr
+; CHECK:    # %bb.0: # %entry
+; CHECK:    bl nearbyint
+; CHECK:    blr
 ;
 ; FAST-LABEL: test_nearbyint:
 ; FAST:       # %bb.0: # %entry
@@ -364,14 +364,14 @@ declare double @llvm.nearbyint.f64(double)
 
 define dso_local float @test_nearbyintf(float %f) local_unnamed_addr {
 ; BE-LABEL: test_nearbyintf:
-; BE:       # %bb.0: # %entry
-; BE-NEXT:    xsrdpic f1, f1
-; BE-NEXT:    blr
+; BE:    # %bb.0: # %entry
+; BE:    bl nearbyint
+; BE:    blr
 ;
 ; CHECK-LABEL: test_nearbyintf:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xsrdpic f1, f1
-; CHECK-NEXT:    blr
+; CHECK:    # %bb.0: # %entry
+; CHECK:    bl nearbyintf
+; CHECK:    blr
 ;
 ; FAST-LABEL: test_nearbyintf:
 ; FAST:       # %bb.0: # %entry

diff  --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index ecd19fd677c5..253e74cf0bf3 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -5101,20 +5101,34 @@ entry:
 define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v1f32:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -32(1)
 ; PC64LE-NEXT:    addis 3, 2, .LCPI80_0 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI80_0 at toc@l(3)
-; PC64LE-NEXT:    xsrdpic 0, 0
-; PC64LE-NEXT:    xscvdpspn 0, 0
+; PC64LE-NEXT:    lfs 1, .LCPI80_0 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 1
 ; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE-NEXT:    addi 1, 1, 32
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v1f32:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -32(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI80_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI80_0 at toc@l(3)
-; PC64LE9-NEXT:    xsrdpic 0, 0
-; PC64LE9-NEXT:    xscvdpspn 0, 0
+; PC64LE9-NEXT:    lfs 1, .LCPI80_0 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    addi 1, 1, 32
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
@@ -5127,19 +5141,50 @@ entry:
 define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v2f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -64(1)
 ; PC64LE-NEXT:    addis 3, 2, .LCPI81_0 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI81_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xvrdpic 34, 0
+; PC64LE-NEXT:    lfd 1, .LCPI81_0 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI81_1 at toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI81_1 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 34, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 64
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v2f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI81_0 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI81_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpic 34, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI81_0 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI81_1 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI81_1 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 34, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
@@ -5152,50 +5197,80 @@ entry:
 define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v3f32:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI82_2 at toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI82_1 at toc@ha
-; PC64LE-NEXT:    lfs 0, .LCPI82_2 at toc@l(3)
-; PC64LE-NEXT:    lfs 1, .LCPI82_1 at toc@l(4)
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -48(1)
 ; PC64LE-NEXT:    addis 3, 2, .LCPI82_0 at toc@ha
-; PC64LE-NEXT:    xsrdpic 0, 0
-; PC64LE-NEXT:    lfs 2, .LCPI82_0 at toc@l(3)
+; PC64LE-NEXT:    lfs 1, .LCPI82_0 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_1 at toc@ha
+; PC64LE-NEXT:    fmr 31, 1
+; PC64LE-NEXT:    lfs 1, .LCPI82_1 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    addis 3, 2, .LCPI82_2 at toc@ha
+; PC64LE-NEXT:    fmr 30, 1
+; PC64LE-NEXT:    lfs 1, .LCPI82_2 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyintf
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xscvdpspn 0, 30
 ; PC64LE-NEXT:    addis 3, 2, .LCPI82_3 at toc@ha
-; PC64LE-NEXT:    xsrdpic 1, 1
-; PC64LE-NEXT:    addi 3, 3, .LCPI82_3 at toc@l
-; PC64LE-NEXT:    xsrdpic 2, 2
-; PC64LE-NEXT:    xscvdpspn 0, 0
 ; PC64LE-NEXT:    xscvdpspn 1, 1
+; PC64LE-NEXT:    addi 3, 3, .LCPI82_3 at toc@l
 ; PC64LE-NEXT:    xxsldwi 34, 0, 0, 1
-; PC64LE-NEXT:    xscvdpspn 0, 2
+; PC64LE-NEXT:    xscvdpspn 0, 31
 ; PC64LE-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE-NEXT:    vmrglw 2, 3, 2
+; PC64LE-NEXT:    vmrglw 2, 2, 3
 ; PC64LE-NEXT:    lvx 3, 0, 3
 ; PC64LE-NEXT:    xxsldwi 36, 0, 0, 1
 ; PC64LE-NEXT:    vperm 2, 4, 2, 3
+; PC64LE-NEXT:    addi 1, 1, 48
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    stfd 30, -16(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stfd 31, -8(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI82_0 at toc@ha
-; PC64LE9-NEXT:    lfs 0, .LCPI82_0 at toc@l(3)
+; PC64LE9-NEXT:    lfs 1, .LCPI82_0 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI82_1 at toc@ha
+; PC64LE9-NEXT:    fmr 31, 1
 ; PC64LE9-NEXT:    lfs 1, .LCPI82_1 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI82_2 at toc@ha
-; PC64LE9-NEXT:    xsrdpic 0, 0
-; PC64LE9-NEXT:    lfs 2, .LCPI82_2 at toc@l(3)
+; PC64LE9-NEXT:    fmr 30, 1
+; PC64LE9-NEXT:    lfs 1, .LCPI82_2 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyintf
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 30
+; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 1
+; PC64LE9-NEXT:    xscvdpspn 0, 31
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI82_3 at toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI82_3 at toc@l
-; PC64LE9-NEXT:    xsrdpic 1, 1
-; PC64LE9-NEXT:    xsrdpic 2, 2
-; PC64LE9-NEXT:    xscvdpspn 0, 0
-; PC64LE9-NEXT:    xscvdpspn 1, 1
-; PC64LE9-NEXT:    xscvdpspn 2, 2
-; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
-; PC64LE9-NEXT:    xxsldwi 35, 1, 1, 1
-; PC64LE9-NEXT:    xxsldwi 34, 2, 2, 1
 ; PC64LE9-NEXT:    vmrglw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
+; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 1
 ; PC64LE9-NEXT:    vperm 2, 4, 2, 3
+; PC64LE9-NEXT:    addi 1, 1, 48
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    lfd 31, -8(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    lfd 30, -16(1) # 8-byte Folded Reload
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
@@ -5208,31 +5283,72 @@ entry:
 define <3 x double> @constrained_vector_nearby_v3f64() #0 {
 ; PC64LE-LABEL: constrained_vector_nearby_v3f64:
 ; PC64LE:       # %bb.0: # %entry
-; PC64LE-NEXT:    addis 3, 2, .LCPI83_1 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI83_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI83_0 at toc@ha
 ; PC64LE-NEXT:    lfd 1, .LCPI83_0 at toc@l(3)
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xsrdpic 3, 1
-; PC64LE-NEXT:    xvrdpic 2, 0
-; PC64LE-NEXT:    xxswapd 1, 2
-; PC64LE-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
-; PC64LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI83_1 at toc@ha
+; PC64LE-NEXT:    lfs 1, .LCPI83_1 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI83_2 at toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 0, 1
+; PC64LE-NEXT:    lfd 1, .LCPI83_2 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    xxswapd 0, 63
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    xxlor 2, 63, 63
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    fmr 3, 1
+; PC64LE-NEXT:    fmr 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_nearby_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI83_0 at toc@ha
-; PC64LE9-NEXT:    lfd 0, .LCPI83_0 at toc@l(3)
+; PC64LE9-NEXT:    lfd 1, .LCPI83_0 at toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI83_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI83_1 at toc@l
-; PC64LE9-NEXT:    xsrdpic 3, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpic 2, 0
-; PC64LE9-NEXT:    xxswapd 1, 2
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfs 1, .LCPI83_1 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI83_2 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 0, 1
+; PC64LE9-NEXT:    lfd 1, .LCPI83_2 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    fmr 3, 1
+; PC64LE9-NEXT:    xxswapd 1, 63
+; PC64LE9-NEXT:    xscpsgndp 2, 63, 63
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
-; PC64LE9-NEXT:    # kill: def $f2 killed $f2 killed $vsl2
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
@@ -5245,28 +5361,86 @@ entry:
 define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
 ; PC64LE-LABEL: constrained_vector_nearbyint_v4f64:
 ; PC64LE:       # %bb.0: # %entry
+; PC64LE-NEXT:    mflr 0
+; PC64LE-NEXT:    std 0, 16(1)
+; PC64LE-NEXT:    stdu 1, -80(1)
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    stxvd2x 63, 1, 3 # 16-byte Folded Spill
 ; PC64LE-NEXT:    addis 3, 2, .LCPI84_0 at toc@ha
-; PC64LE-NEXT:    addis 4, 2, .LCPI84_1 at toc@ha
-; PC64LE-NEXT:    addi 3, 3, .LCPI84_0 at toc@l
-; PC64LE-NEXT:    lxvd2x 0, 0, 3
-; PC64LE-NEXT:    addi 3, 4, .LCPI84_1 at toc@l
-; PC64LE-NEXT:    lxvd2x 1, 0, 3
-; PC64LE-NEXT:    xxswapd 0, 0
-; PC64LE-NEXT:    xxswapd 1, 1
-; PC64LE-NEXT:    xvrdpic 34, 0
-; PC64LE-NEXT:    xvrdpic 35, 1
+; PC64LE-NEXT:    lfd 1, .LCPI84_0 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI84_1 at toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI84_1 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    addis 3, 2, .LCPI84_2 at toc@ha
+; PC64LE-NEXT:    xxmrghd 63, 1, 0
+; PC64LE-NEXT:    lfd 1, .LCPI84_2 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    stxvd2x 1, 1, 3 # 16-byte Folded Spill
+; PC64LE-NEXT:    addis 3, 2, .LCPI84_3 at toc@ha
+; PC64LE-NEXT:    lfd 1, .LCPI84_3 at toc@l(3)
+; PC64LE-NEXT:    bl nearbyint
+; PC64LE-NEXT:    nop
+; PC64LE-NEXT:    li 3, 48
+; PC64LE-NEXT:    vmr 2, 31
+; PC64LE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE-NEXT:    lxvd2x 0, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    li 3, 64
+; PC64LE-NEXT:    lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; PC64LE-NEXT:    xxmrghd 35, 1, 0
+; PC64LE-NEXT:    addi 1, 1, 80
+; PC64LE-NEXT:    ld 0, 16(1)
+; PC64LE-NEXT:    mtlr 0
 ; PC64LE-NEXT:    blr
 ;
 ; PC64LE9-LABEL: constrained_vector_nearbyint_v4f64:
 ; PC64LE9:       # %bb.0: # %entry
+; PC64LE9-NEXT:    mflr 0
+; PC64LE9-NEXT:    std 0, 16(1)
+; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI84_0 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI84_0 at toc@l
-; PC64LE9-NEXT:    lxvx 0, 0, 3
+; PC64LE9-NEXT:    lfd 1, .LCPI84_0 at toc@l(3)
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI84_1 at toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI84_1 at toc@l
-; PC64LE9-NEXT:    xvrdpic 34, 0
-; PC64LE9-NEXT:    lxvx 0, 0, 3
-; PC64LE9-NEXT:    xvrdpic 35, 0
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI84_1 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    addis 3, 2, .LCPI84_2 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 63, 1, 0
+; PC64LE9-NEXT:    lfd 1, .LCPI84_2 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    addis 3, 2, .LCPI84_3 at toc@ha
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI84_3 at toc@l(3)
+; PC64LE9-NEXT:    bl nearbyint
+; PC64LE9-NEXT:    nop
+; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    vmr 2, 31
+; PC64LE9-NEXT:    lxv 63, 48(1) # 16-byte Folded Reload
+; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    xxmrghd 35, 1, 0
+; PC64LE9-NEXT:    addi 1, 1, 64
+; PC64LE9-NEXT:    ld 0, 16(1)
+; PC64LE9-NEXT:    mtlr 0
 ; PC64LE9-NEXT:    blr
 entry:
   %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(


        


More information about the llvm-commits mailing list