[llvm] [SystemZ] Consider VST/VL as SimpleBDXStore/Load (PR #135623)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 14 06:17:12 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-systemz

Author: Dominik Steenken (dominik-steenken)

<details>
<summary>Changes</summary>

Previously `vst` and `vl` were not considered "simple" BDX stores and loads, leading to, among other things, some opportunities for `mvc` optimization to be missed.

This PR addresses this and updates some tests to account for additional `mvc` instructions being emitted.

This is observed to have a neutral or slightly beneficial effect performance-wise.

---
Full diff: https://github.com/llvm/llvm-project/pull/135623.diff


2 Files Affected:

- (modified) llvm/lib/Target/SystemZ/SystemZInstrVector.td (+4-2) 
- (modified) llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll (+27-41) 


``````````diff
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index e1fe7edc4cb08..d8c48239ac633 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -105,7 +105,8 @@ let Predicates = [FeatureVector] in {
 
 let Predicates = [FeatureVector] in {
   // Load.
-  defm VL : UnaryVRXAlign<"vl", 0xE706>;
+  let SimpleBDXLoad = 1 in
+      defm VL : UnaryVRXAlign<"vl", 0xE706>;
 
   // Load to block boundary.  The number of loaded bytes is only known
   // at run time.  The instruction is really polymorphic, but v128b matches
@@ -213,7 +214,8 @@ defm : ReplicatePeephole<VLREPG, v2f64, z_load, f64>;
 
 let Predicates = [FeatureVector] in {
   // Store.
-  defm VST : StoreVRXAlign<"vst", 0xE70E>;
+  let SimpleBDXStore = 1 in
+      defm VST : StoreVRXAlign<"vst", 0xE70E>;
 
   // Store with length.  The number of stored bytes is only known at run time.
   def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>;
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index e0818ea3da294..96d91953b9f83 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -440,10 +440,9 @@ define void @constrained_vector_frem_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    ld %f0, 0(%r1)
 ; SZ13-NEXT:    ldr %f2, %f8
 ; SZ13-NEXT:    brasl %r14, fmod at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -1473,11 +1472,10 @@ define void @constrained_vector_pow_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    ldr %f2, %f9
 ; SZ13-NEXT:    brasl %r14, pow at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Reload
 ; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -1829,9 +1827,8 @@ define void @constrained_vector_powi_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    ld %f0, 0(%r1)
 ; SZ13-NEXT:    lghi %r2, 3
 ; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
 ; SZ13-NEXT:    lmg %r13, %r15, 280(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -2155,10 +2152,9 @@ define void @constrained_vector_sin_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    vst %v0, 160(%r15), 3 # 16-byte Folded Spill
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    brasl %r14, sin at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -2472,10 +2468,9 @@ define void @constrained_vector_cos_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    vst %v0, 160(%r15), 3 # 16-byte Folded Spill
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    brasl %r14, cos at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -2789,10 +2784,9 @@ define void @constrained_vector_exp_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    vst %v0, 160(%r15), 3 # 16-byte Folded Spill
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    brasl %r14, exp at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -3106,10 +3100,9 @@ define void @constrained_vector_exp2_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    vst %v0, 160(%r15), 3 # 16-byte Folded Spill
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    brasl %r14, exp2 at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -3423,10 +3416,9 @@ define void @constrained_vector_log_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    vst %v0, 160(%r15), 3 # 16-byte Folded Spill
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    brasl %r14, log at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -3740,10 +3732,9 @@ define void @constrained_vector_log10_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    vst %v0, 160(%r15), 3 # 16-byte Folded Spill
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    brasl %r14, log10 at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -4057,10 +4048,9 @@ define void @constrained_vector_log2_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    vst %v0, 160(%r15), 3 # 16-byte Folded Spill
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    brasl %r14, log2 at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -4788,10 +4778,9 @@ define void @constrained_vector_log10_maxnum_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    vst %v0, 160(%r15), 3 # 16-byte Folded Spill
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    brasl %r14, fmax at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -5165,11 +5154,10 @@ define void @constrained_vector_minnum_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    ldr %f2, %f9
 ; SZ13-NEXT:    brasl %r14, fmin at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Reload
 ; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -6612,10 +6600,9 @@ define void @constrained_vector_tan_v3f64(ptr %a) #0 {
 ; SZ13-NEXT:    vst %v0, 160(%r15), 3 # 16-byte Folded Spill
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    brasl %r14, tan at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
 ; SZ13-NEXT:    br %r14
 entry:
@@ -6977,11 +6964,10 @@ define void @constrained_vector_atan2_v3f64(ptr %a, ptr %b) #0 {
 ; SZ13-NEXT:    ldr %f0, %f8
 ; SZ13-NEXT:    ldr %f2, %f9
 ; SZ13-NEXT:    brasl %r14, atan2 at PLT
-; SZ13-NEXT:    std %f0, 16(%r13)
-; SZ13-NEXT:    vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT:    mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
 ; SZ13-NEXT:    ld %f8, 216(%r15) # 8-byte Reload
 ; SZ13-NEXT:    ld %f9, 208(%r15) # 8-byte Reload
-; SZ13-NEXT:    vst %v0, 0(%r13), 4
+; SZ13-NEXT:    std %f0, 16(%r13)
 ; SZ13-NEXT:    lmg %r13, %r15, 328(%r15)
 ; SZ13-NEXT:    br %r14
 entry:

``````````

</details>


https://github.com/llvm/llvm-project/pull/135623


More information about the llvm-commits mailing list