[llvm] [SystemZ] Modify tests for constrained rounding functions (PR #116952)

Serge Pavlov via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 20 05:42:54 PST 2024


https://github.com/spavloff updated https://github.com/llvm/llvm-project/pull/116952

>From 557b7983df636f30b7b78d4db60adc895d860f71 Mon Sep 17 00:00:00 2001
From: Serge Pavlov <sepavloff at gmail.com>
Date: Wed, 20 Nov 2024 11:39:49 +0700
Subject: [PATCH] [SystemZ] Modify tests for constrained rounding functions

The existing tests for constrained functions often use constant
arguments. If constant evaluation is enhanced, such tests will not check
code generation of the tested functions. To avoid it, the tests are
modified to use loaded value instead of constants. Now only the tests
for rounding functions are changed.
---
 .../vector-constrained-fp-intrinsics.ll       | 654 +++++++++---------
 1 file changed, 327 insertions(+), 327 deletions(-)

diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index edf818ab95131c..b08f0e5a74d569 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -4156,86 +4156,81 @@ entry:
   ret <4 x double> %log2
 }
 
-define <1 x float> @constrained_vector_rint_v1f32() #0 {
+define <1 x float> @constrained_vector_rint_v1f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_rint_v1f32:
 ; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI75_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    le %f0, 0(%r2)
 ; S390X-NEXT:    fiebr %f0, 0, %f0
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_rint_v1f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI75_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lde %f0, 0(%r2)
 ; SZ13-NEXT:    fiebr %f0, 0, %f0
 ; SZ13-NEXT:    vlr %v24, %v0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <1 x float>, ptr %a
   %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
-                             <1 x float> <float 42.0>,
+                             <1 x float> %b,
                              metadata !"round.dynamic",
                              metadata !"fpexcept.strict") #0
   ret <1 x float> %rint
 }
 
-define <2 x double> @constrained_vector_rint_v2f64() #0 {
+define <2 x double> @constrained_vector_rint_v2f64(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_rint_v2f64:
 ; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI76_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI76_1
-; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ld %f0, 8(%r2)
+; S390X-NEXT:    ld %f1, 0(%r2)
 ; S390X-NEXT:    fidbr %f2, 0, %f0
 ; S390X-NEXT:    fidbr %f0, 0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_rint_v2f64:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI76_0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
+; SZ13-NEXT:    vl %v0, 0(%r2), 3
 ; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <2 x double>, ptr %a
   %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
-                        <2 x double> <double 42.1, double 42.0>,
+                        <2 x double> %b,
                         metadata !"round.dynamic",
                         metadata !"fpexcept.strict") #0
   ret <2 x double> %rint
 }
 
-define <3 x float> @constrained_vector_rint_v3f32() #0 {
+define <3 x float> @constrained_vector_rint_v3f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_rint_v3f32:
 ; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI77_0
-; S390X-NEXT:    le %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI77_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI77_2
-; S390X-NEXT:    le %f3, 0(%r1)
+; S390X-NEXT:    lg %r0, 0(%r2)
+; S390X-NEXT:    risbg %r1, %r0, 0, 159, 0
+; S390X-NEXT:    le %f0, 8(%r2)
+; S390X-NEXT:    ldgr %f1, %r1
+; S390X-NEXT:    sllg %r0, %r0, 32
+; S390X-NEXT:    ldgr %f2, %r0
 ; S390X-NEXT:    fiebr %f4, 0, %f0
-; S390X-NEXT:    fiebr %f2, 0, %f1
-; S390X-NEXT:    fiebr %f0, 0, %f3
+; S390X-NEXT:    fiebr %f2, 0, %f2
+; S390X-NEXT:    fiebr %f0, 0, %f1
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_rint_v3f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI77_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI77_1
-; SZ13-NEXT:    lde %f1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI77_2
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    fiebr %f0, 0, %f0
+; SZ13-NEXT:    vl %v0, 0(%r2), 4
+; SZ13-NEXT:    vrepf %v1, %v0, 2
+; SZ13-NEXT:    vrepf %v2, %v0, 1
 ; SZ13-NEXT:    fiebr %f1, 0, %f1
 ; SZ13-NEXT:    fiebr %f2, 0, %f2
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    fiebr %f0, 0, %f0
+; SZ13-NEXT:    vmrhf %v0, %v0, %v2
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
 ; SZ13-NEXT:    br %r14
  entry:
+  %b = load <3 x float>, ptr %a
   %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              <3 x float> %b,
                               metadata !"round.dynamic",
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %rint
@@ -4274,17 +4269,13 @@ entry:
   ret void
 }
 
-define <4 x double> @constrained_vector_rint_v4f64() #0 {
+define <4 x double> @constrained_vector_rint_v4f64(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_rint_v4f64:
 ; S390X:       # %bb.0: # %entry
-; S390X-NEXT:    larl %r1, .LCPI79_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI79_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI79_2
-; S390X-NEXT:    ld %f2, 0(%r1)
-; S390X-NEXT:    larl %r1, .LCPI79_3
-; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    ld %f0, 24(%r2)
+; S390X-NEXT:    ld %f1, 16(%r2)
+; S390X-NEXT:    ld %f2, 8(%r2)
+; S390X-NEXT:    ld %f3, 0(%r2)
 ; S390X-NEXT:    fidbr %f6, 0, %f0
 ; S390X-NEXT:    fidbr %f4, 0, %f1
 ; S390X-NEXT:    fidbr %f2, 0, %f2
@@ -4293,23 +4284,21 @@ define <4 x double> @constrained_vector_rint_v4f64() #0 {
 ;
 ; SZ13-LABEL: constrained_vector_rint_v4f64:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI79_0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
-; SZ13-NEXT:    larl %r1, .LCPI79_1
-; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
+; SZ13-NEXT:    vl %v0, 16(%r2), 4
+; SZ13-NEXT:    vl %v1, 0(%r2), 4
+; SZ13-NEXT:    vfidb %v24, %v1, 0, 0
 ; SZ13-NEXT:    vfidb %v26, %v0, 0, 0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <4 x double>, ptr %a
   %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
-                        <4 x double> <double 42.1, double 42.2,
-                                      double 42.3, double 42.4>,
+                        <4 x double> %b,
                         metadata !"round.dynamic",
                         metadata !"fpexcept.strict") #0
   ret <4 x double> %rint
 }
 
-define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
+define <1 x float> @constrained_vector_nearbyint_v1f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_nearbyint_v1f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
@@ -4317,114 +4306,115 @@ define <1 x float> @constrained_vector_nearbyint_v1f32() #0 {
 ; S390X-NEXT:    .cfi_offset %r15, -40
 ; S390X-NEXT:    aghi %r15, -160
 ; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI80_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    le %f0, 0(%r2)
 ; S390X-NEXT:    brasl %r14, nearbyintf at PLT
 ; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_nearbyint_v1f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI80_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lde %f0, 0(%r2)
 ; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
 ; SZ13-NEXT:    vlr %v24, %v0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <1 x float>, ptr %a
   %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
-                               <1 x float> <float 42.0>,
+                               <1 x float> %b,
                                metadata !"round.dynamic",
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %nearby
 }
 
-define <2 x double> @constrained_vector_nearbyint_v2f64() #0 {
+define <2 x double> @constrained_vector_nearbyint_v2f64(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_nearbyint_v2f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI81_0
-; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    ld %f0, 8(%r2)
+; S390X-NEXT:    ld %f8, 0(%r2)
 ; S390X-NEXT:    brasl %r14, nearbyint at PLT
-; S390X-NEXT:    larl %r1, .LCPI81_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
 ; S390X-NEXT:    brasl %r14, nearbyint at PLT
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_nearbyint_v2f64:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI81_0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
+; SZ13-NEXT:    vl %v0, 0(%r2), 3
 ; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <2 x double>, ptr %a
   %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
-                                <2 x double> <double 42.1, double 42.0>,
+                                <2 x double> %b,
                                 metadata !"round.dynamic",
                                 metadata !"fpexcept.strict") #0
   ret <2 x double> %nearby
 }
 
-define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
+define <3 x float> @constrained_vector_nearbyint_v3f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_nearbyint_v3f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
 ; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI82_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lg %r0, 0(%r2)
+; S390X-NEXT:    le %f0, 8(%r2)
+; S390X-NEXT:    risbg %r1, %r0, 0, 159, 0
+; S390X-NEXT:    ldgr %f8, %r1
+; S390X-NEXT:    sllg %r0, %r0, 32
+; S390X-NEXT:    ldgr %f9, %r0
 ; S390X-NEXT:    brasl %r14, nearbyintf at PLT
-; S390X-NEXT:    larl %r1, .LCPI82_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f9
 ; S390X-NEXT:    brasl %r14, nearbyintf at PLT
-; S390X-NEXT:    larl %r1, .LCPI82_2
-; S390X-NEXT:    le %f1, 0(%r1)
 ; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f0, %f8
 ; S390X-NEXT:    brasl %r14, nearbyintf at PLT
 ; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ler %f4, %f8
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    ler %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_nearbyint_v3f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI82_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI82_1
-; SZ13-NEXT:    lde %f1, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI82_2
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r2), 4
+; SZ13-NEXT:    vrepf %v1, %v0, 2
+; SZ13-NEXT:    vrepf %v2, %v0, 1
 ; SZ13-NEXT:    fiebra %f1, 0, %f1, 4
 ; SZ13-NEXT:    fiebra %f2, 0, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
+; SZ13-NEXT:    vmrhf %v0, %v0, %v2
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <3 x float>, ptr %a
   %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
-                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              <3 x float> %b,
                               metadata !"round.dynamic",
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %nearby
@@ -4484,60 +4474,57 @@ entry:
   ret void
 }
 
-define <4 x double> @constrained_vector_nearbyint_v4f64() #0 {
+define <4 x double> @constrained_vector_nearbyint_v4f64(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_nearbyint_v4f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -184
-; S390X-NEXT:    .cfi_def_cfa_offset 344
-; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
 ; S390X-NEXT:    .cfi_offset %f9, -176
 ; S390X-NEXT:    .cfi_offset %f10, -184
-; S390X-NEXT:    larl %r1, .LCPI84_0
-; S390X-NEXT:    ld %f0, 0(%r1)
-; S390X-NEXT:    brasl %r14, nearbyint at PLT
-; S390X-NEXT:    larl %r1, .LCPI84_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    ld %f8, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    ld %f0, 24(%r2)
+; S390X-NEXT:    ld %f10, 16(%r2)
 ; S390X-NEXT:    brasl %r14, nearbyint at PLT
-; S390X-NEXT:    larl %r1, .LCPI84_2
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f9, %f0
-; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f10
 ; S390X-NEXT:    brasl %r14, nearbyint at PLT
-; S390X-NEXT:    larl %r1, .LCPI84_3
-; S390X-NEXT:    ld %f1, 0(%r1)
 ; S390X-NEXT:    ldr %f10, %f0
-; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f0, %f9
 ; S390X-NEXT:    brasl %r14, nearbyint at PLT
-; S390X-NEXT:    ldr %f2, %f10
-; S390X-NEXT:    ldr %f4, %f9
-; S390X-NEXT:    ldr %f6, %f8
-; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ldr %f6, %f11
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 304(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_nearbyint_v4f64:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI84_0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
-; SZ13-NEXT:    larl %r1, .LCPI84_1
-; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
+; SZ13-NEXT:    vl %v0, 16(%r2), 4
+; SZ13-NEXT:    vl %v1, 0(%r2), 4
+; SZ13-NEXT:    vfidb %v24, %v1, 4, 0
 ; SZ13-NEXT:    vfidb %v26, %v0, 4, 0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <4 x double>, ptr %a
   %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
-                                <4 x double> <double 42.1, double 42.2,
-                                              double 42.3, double 42.4>,
+                                <4 x double> %b,
                                 metadata !"round.dynamic",
                                 metadata !"fpexcept.strict") #0
   ret <4 x double> %nearby
@@ -5544,7 +5531,7 @@ entry:
   ret <4 x double> %result
 }
 
-define <1 x float> @constrained_vector_ceil_v1f32() #0 {
+define <1 x float> @constrained_vector_ceil_v1f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_ceil_v1f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
@@ -5565,97 +5552,101 @@ define <1 x float> @constrained_vector_ceil_v1f32() #0 {
 ; SZ13-NEXT:    vlr %v24, %v0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <1 x float>, ptr %a
   %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
                                <1 x float> <float 1.5>,
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %ceil
 }
 
-define <2 x double> @constrained_vector_ceil_v2f64() #0 {
+define <2 x double> @constrained_vector_ceil_v2f64(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_ceil_v2f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI104_0
-; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    ld %f0, 8(%r2)
+; S390X-NEXT:    ld %f8, 0(%r2)
 ; S390X-NEXT:    brasl %r14, ceil at PLT
-; S390X-NEXT:    larl %r1, .LCPI104_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
 ; S390X-NEXT:    brasl %r14, ceil at PLT
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_ceil_v2f64:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI104_0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
+; SZ13-NEXT:    vl %v0, 0(%r2), 3
 ; SZ13-NEXT:    vfidb %v24, %v0, 4, 6
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <2 x double>, ptr %a
   %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
+                                <2 x double> %b,
                                 metadata !"fpexcept.strict") #0
   ret <2 x double> %ceil
 }
 
-define <3 x float> @constrained_vector_ceil_v3f32() #0 {
+define <3 x float> @constrained_vector_ceil_v3f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_ceil_v3f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
 ; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI105_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lg %r0, 0(%r2)
+; S390X-NEXT:    le %f0, 8(%r2)
+; S390X-NEXT:    risbg %r1, %r0, 0, 159, 0
+; S390X-NEXT:    ldgr %f8, %r1
+; S390X-NEXT:    sllg %r0, %r0, 32
+; S390X-NEXT:    ldgr %f9, %r0
 ; S390X-NEXT:    brasl %r14, ceilf at PLT
-; S390X-NEXT:    larl %r1, .LCPI105_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f9
 ; S390X-NEXT:    brasl %r14, ceilf at PLT
-; S390X-NEXT:    larl %r1, .LCPI105_2
-; S390X-NEXT:    le %f1, 0(%r1)
 ; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f0, %f8
 ; S390X-NEXT:    brasl %r14, ceilf at PLT
 ; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ler %f4, %f8
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    ler %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_ceil_v3f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI105_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI105_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    vgmf %v1, 2, 9
-; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r2), 4
+; SZ13-NEXT:    vrepf %v1, %v0, 2
+; SZ13-NEXT:    vrepf %v2, %v0, 1
 ; SZ13-NEXT:    fiebra %f1, 6, %f1, 4
 ; SZ13-NEXT:    fiebra %f2, 6, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
+; SZ13-NEXT:    vmrhf %v0, %v0, %v2
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <3 x float>, ptr %a
   %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              <3 x float> %b,
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %ceil
 }
@@ -5713,7 +5704,7 @@ entry:
   ret void
 }
 
-define <1 x float> @constrained_vector_floor_v1f32() #0 {
+define <1 x float> @constrained_vector_floor_v1f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_floor_v1f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
@@ -5721,111 +5712,114 @@ define <1 x float> @constrained_vector_floor_v1f32() #0 {
 ; S390X-NEXT:    .cfi_offset %r15, -40
 ; S390X-NEXT:    aghi %r15, -160
 ; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI107_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    le %f0, 0(%r2)
 ; S390X-NEXT:    brasl %r14, floorf at PLT
 ; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_floor_v1f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    lde %f0, 0(%r2)
 ; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
 ; SZ13-NEXT:    vlr %v24, %v0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <1 x float>, ptr %a
   %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
-                               <1 x float> <float 1.5>,
+                               <1 x float> %b,
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %floor
 }
 
 
-define <2 x double> @constrained_vector_floor_v2f64() #0 {
+define <2 x double> @constrained_vector_floor_v2f64(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_floor_v2f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI108_0
-; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    ld %f0, 8(%r2)
+; S390X-NEXT:    ld %f8, 0(%r2)
 ; S390X-NEXT:    brasl %r14, floor at PLT
-; S390X-NEXT:    larl %r1, .LCPI108_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
 ; S390X-NEXT:    brasl %r14, floor at PLT
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_floor_v2f64:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI108_0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
+; SZ13-NEXT:    vl %v0, 0(%r2), 3
 ; SZ13-NEXT:    vfidb %v24, %v0, 4, 7
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <2 x double>, ptr %a
   %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
+                                <2 x double> %b,
                                 metadata !"fpexcept.strict") #0
   ret <2 x double> %floor
 }
 
-define <3 x float> @constrained_vector_floor_v3f32() #0 {
+define <3 x float> @constrained_vector_floor_v3f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_floor_v3f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
 ; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI109_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lg %r0, 0(%r2)
+; S390X-NEXT:    le %f0, 8(%r2)
+; S390X-NEXT:    risbg %r1, %r0, 0, 159, 0
+; S390X-NEXT:    ldgr %f8, %r1
+; S390X-NEXT:    sllg %r0, %r0, 32
+; S390X-NEXT:    ldgr %f9, %r0
 ; S390X-NEXT:    brasl %r14, floorf at PLT
-; S390X-NEXT:    larl %r1, .LCPI109_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f9
 ; S390X-NEXT:    brasl %r14, floorf at PLT
-; S390X-NEXT:    larl %r1, .LCPI109_2
-; S390X-NEXT:    le %f1, 0(%r1)
 ; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f0, %f8
 ; S390X-NEXT:    brasl %r14, floorf at PLT
 ; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ler %f4, %f8
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    ler %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_floor_v3f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI109_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI109_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    vgmf %v1, 2, 9
-; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r2), 4
+; SZ13-NEXT:    vrepf %v1, %v0, 2
+; SZ13-NEXT:    vrepf %v2, %v0, 1
 ; SZ13-NEXT:    fiebra %f1, 7, %f1, 4
 ; SZ13-NEXT:    fiebra %f2, 7, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
+; SZ13-NEXT:    vmrhf %v0, %v0, %v2
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <3 x float>, ptr %a
   %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              <3 x float> %b,
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %floor
 }
@@ -5883,7 +5877,7 @@ entry:
   ret void
 }
 
-define <1 x float> @constrained_vector_round_v1f32() #0 {
+define <1 x float> @constrained_vector_round_v1f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_round_v1f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
@@ -5891,110 +5885,113 @@ define <1 x float> @constrained_vector_round_v1f32() #0 {
 ; S390X-NEXT:    .cfi_offset %r15, -40
 ; S390X-NEXT:    aghi %r15, -160
 ; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI111_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    le %f0, 0(%r2)
 ; S390X-NEXT:    brasl %r14, roundf at PLT
 ; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_round_v1f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    lde %f0, 0(%r2)
 ; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
 ; SZ13-NEXT:    vlr %v24, %v0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <1 x float>, ptr %a
   %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
-                               <1 x float> <float 1.5>,
+                               <1 x float> %b,
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %round
 }
 
-define <2 x double> @constrained_vector_round_v2f64() #0 {
+define <2 x double> @constrained_vector_round_v2f64(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_round_v2f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI112_0
-; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    ld %f0, 8(%r2)
+; S390X-NEXT:    ld %f8, 0(%r2)
 ; S390X-NEXT:    brasl %r14, round at PLT
-; S390X-NEXT:    larl %r1, .LCPI112_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
 ; S390X-NEXT:    brasl %r14, round at PLT
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_round_v2f64:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI112_0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
+; SZ13-NEXT:    vl %v0, 0(%r2), 3
 ; SZ13-NEXT:    vfidb %v24, %v0, 4, 1
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <2 x double>, ptr %a
   %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
+                                <2 x double> %b,
                                 metadata !"fpexcept.strict") #0
   ret <2 x double> %round
 }
 
-define <3 x float> @constrained_vector_round_v3f32() #0 {
+define <3 x float> @constrained_vector_round_v3f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_round_v3f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
 ; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI113_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lg %r0, 0(%r2)
+; S390X-NEXT:    le %f0, 8(%r2)
+; S390X-NEXT:    risbg %r1, %r0, 0, 159, 0
+; S390X-NEXT:    ldgr %f8, %r1
+; S390X-NEXT:    sllg %r0, %r0, 32
+; S390X-NEXT:    ldgr %f9, %r0
 ; S390X-NEXT:    brasl %r14, roundf at PLT
-; S390X-NEXT:    larl %r1, .LCPI113_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f9
 ; S390X-NEXT:    brasl %r14, roundf at PLT
-; S390X-NEXT:    larl %r1, .LCPI113_2
-; S390X-NEXT:    le %f1, 0(%r1)
 ; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f0, %f8
 ; S390X-NEXT:    brasl %r14, roundf at PLT
 ; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ler %f4, %f8
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    ler %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_round_v3f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI113_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI113_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    vgmf %v1, 2, 9
-; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r2), 4
+; SZ13-NEXT:    vrepf %v1, %v0, 2
+; SZ13-NEXT:    vrepf %v2, %v0, 1
 ; SZ13-NEXT:    fiebra %f1, 1, %f1, 4
 ; SZ13-NEXT:    fiebra %f2, 1, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
+; SZ13-NEXT:    vmrhf %v0, %v0, %v2
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <3 x float>, ptr %a
   %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              <3 x float> %b,
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %round
 }
@@ -6053,7 +6050,7 @@ entry:
   ret void
 }
 
-define <1 x float> @constrained_vector_trunc_v1f32() #0 {
+define <1 x float> @constrained_vector_trunc_v1f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_trunc_v1f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
@@ -6061,110 +6058,113 @@ define <1 x float> @constrained_vector_trunc_v1f32() #0 {
 ; S390X-NEXT:    .cfi_offset %r15, -40
 ; S390X-NEXT:    aghi %r15, -160
 ; S390X-NEXT:    .cfi_def_cfa_offset 320
-; S390X-NEXT:    larl %r1, .LCPI115_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    le %f0, 0(%r2)
 ; S390X-NEXT:    brasl %r14, truncf at PLT
 ; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_trunc_v1f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    lde %f0, 0(%r2)
 ; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
 ; SZ13-NEXT:    vlr %v24, %v0
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <1 x float>, ptr %a
   %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
-                               <1 x float> <float 1.5>,
+                               <1 x float> %b,
                                metadata !"fpexcept.strict") #0
   ret <1 x float> %trunc
 }
 
-define <2 x double> @constrained_vector_trunc_v2f64() #0 {
+define <2 x double> @constrained_vector_trunc_v2f64(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_trunc_v2f64:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -168
-; S390X-NEXT:    .cfi_def_cfa_offset 328
-; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
-; S390X-NEXT:    larl %r1, .LCPI116_0
-; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    ld %f0, 8(%r2)
+; S390X-NEXT:    ld %f8, 0(%r2)
 ; S390X-NEXT:    brasl %r14, trunc at PLT
-; S390X-NEXT:    larl %r1, .LCPI116_1
-; S390X-NEXT:    ld %f1, 0(%r1)
-; S390X-NEXT:    ldr %f8, %f0
-; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
 ; S390X-NEXT:    brasl %r14, trunc at PLT
-; S390X-NEXT:    ldr %f2, %f8
-; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_trunc_v2f64:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI116_0
-; SZ13-NEXT:    vl %v0, 0(%r1), 3
+; SZ13-NEXT:    vl %v0, 0(%r2), 3
 ; SZ13-NEXT:    vfidb %v24, %v0, 4, 5
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <2 x double>, ptr %a
   %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
-                                <2 x double> <double 1.1, double 1.9>,
+                                <2 x double> %b,
                                 metadata !"fpexcept.strict") #0
   ret <2 x double> %trunc
 }
 
-define <3 x float> @constrained_vector_trunc_v3f32() #0 {
+define <3 x float> @constrained_vector_trunc_v3f32(ptr %a) #0 {
 ; S390X-LABEL: constrained_vector_trunc_v3f32:
 ; S390X:       # %bb.0: # %entry
 ; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
 ; S390X-NEXT:    .cfi_offset %r14, -48
 ; S390X-NEXT:    .cfi_offset %r15, -40
-; S390X-NEXT:    aghi %r15, -176
-; S390X-NEXT:    .cfi_def_cfa_offset 336
-; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
-; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
 ; S390X-NEXT:    .cfi_offset %f8, -168
 ; S390X-NEXT:    .cfi_offset %f9, -176
-; S390X-NEXT:    larl %r1, .LCPI117_0
-; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lg %r0, 0(%r2)
+; S390X-NEXT:    le %f0, 8(%r2)
+; S390X-NEXT:    risbg %r1, %r0, 0, 159, 0
+; S390X-NEXT:    ldgr %f8, %r1
+; S390X-NEXT:    sllg %r0, %r0, 32
+; S390X-NEXT:    ldgr %f9, %r0
 ; S390X-NEXT:    brasl %r14, truncf at PLT
-; S390X-NEXT:    larl %r1, .LCPI117_1
-; S390X-NEXT:    le %f1, 0(%r1)
-; S390X-NEXT:    ler %f8, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f9
 ; S390X-NEXT:    brasl %r14, truncf at PLT
-; S390X-NEXT:    larl %r1, .LCPI117_2
-; S390X-NEXT:    le %f1, 0(%r1)
 ; S390X-NEXT:    ler %f9, %f0
-; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f0, %f8
 ; S390X-NEXT:    brasl %r14, truncf at PLT
 ; S390X-NEXT:    ler %f2, %f9
-; S390X-NEXT:    ler %f4, %f8
-; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
-; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    ler %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
 ; S390X-NEXT:    br %r14
 ;
 ; SZ13-LABEL: constrained_vector_trunc_v3f32:
 ; SZ13:       # %bb.0: # %entry
-; SZ13-NEXT:    larl %r1, .LCPI117_0
-; SZ13-NEXT:    lde %f0, 0(%r1)
-; SZ13-NEXT:    larl %r1, .LCPI117_1
-; SZ13-NEXT:    lde %f2, 0(%r1)
-; SZ13-NEXT:    vgmf %v1, 2, 9
-; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r2), 4
+; SZ13-NEXT:    vrepf %v1, %v0, 2
+; SZ13-NEXT:    vrepf %v2, %v0, 1
 ; SZ13-NEXT:    fiebra %f1, 5, %f1, 4
 ; SZ13-NEXT:    fiebra %f2, 5, %f2, 4
-; SZ13-NEXT:    vmrhf %v1, %v1, %v2
-; SZ13-NEXT:    vrepf %v0, %v0, 0
-; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
+; SZ13-NEXT:    vmrhf %v0, %v0, %v2
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
 ; SZ13-NEXT:    br %r14
 entry:
+  %b = load <3 x float>, ptr %a
   %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
-                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              <3 x float> %b,
                               metadata !"fpexcept.strict") #0
   ret <3 x float> %trunc
 }



More information about the llvm-commits mailing list