[llvm] [SystemZ] Consider VST/VL as SimpleBDXStore/Load (PR #135623)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 14 06:17:12 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-systemz
Author: Dominik Steenken (dominik-steenken)
<details>
<summary>Changes</summary>
Previously `vst` and `vl` were not considered "simple" BDX stores and loads, leading to, among other things, some opportunities for `mvc` optimization to be missed.
This PR addresses this and updates some tests to account for additional `mvc` instructions being emitted.
This is observed to have a neutral or slightly beneficial effect performance-wise.
---
Full diff: https://github.com/llvm/llvm-project/pull/135623.diff
2 Files Affected:
- (modified) llvm/lib/Target/SystemZ/SystemZInstrVector.td (+4-2)
- (modified) llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll (+27-41)
``````````diff
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index e1fe7edc4cb08..d8c48239ac633 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -105,7 +105,8 @@ let Predicates = [FeatureVector] in {
let Predicates = [FeatureVector] in {
// Load.
- defm VL : UnaryVRXAlign<"vl", 0xE706>;
+ let SimpleBDXLoad = 1 in
+ defm VL : UnaryVRXAlign<"vl", 0xE706>;
// Load to block boundary. The number of loaded bytes is only known
// at run time. The instruction is really polymorphic, but v128b matches
@@ -213,7 +214,8 @@ defm : ReplicatePeephole<VLREPG, v2f64, z_load, f64>;
let Predicates = [FeatureVector] in {
// Store.
- defm VST : StoreVRXAlign<"vst", 0xE70E>;
+ let SimpleBDXStore = 1 in
+ defm VST : StoreVRXAlign<"vst", 0xE70E>;
// Store with length. The number of stored bytes is only known at run time.
def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>;
diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
index e0818ea3da294..96d91953b9f83 100644
--- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
@@ -440,10 +440,9 @@ define void @constrained_vector_frem_v3f64(ptr %a) #0 {
; SZ13-NEXT: ld %f0, 0(%r1)
; SZ13-NEXT: ldr %f2, %f8
; SZ13-NEXT: brasl %r14, fmod at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -1473,11 +1472,10 @@ define void @constrained_vector_pow_v3f64(ptr %a) #0 {
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: ldr %f2, %f9
; SZ13-NEXT: brasl %r14, pow at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 200(%r15) # 8-byte Reload
; SZ13-NEXT: ld %f9, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 312(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -1829,9 +1827,8 @@ define void @constrained_vector_powi_v3f64(ptr %a) #0 {
; SZ13-NEXT: ld %f0, 0(%r1)
; SZ13-NEXT: lghi %r2, 3
; SZ13-NEXT: brasl %r14, __powidf2 at PLT
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
; SZ13-NEXT: lmg %r13, %r15, 280(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -2155,10 +2152,9 @@ define void @constrained_vector_sin_v3f64(ptr %a) #0 {
; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: brasl %r14, sin at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -2472,10 +2468,9 @@ define void @constrained_vector_cos_v3f64(ptr %a) #0 {
; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: brasl %r14, cos at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -2789,10 +2784,9 @@ define void @constrained_vector_exp_v3f64(ptr %a) #0 {
; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: brasl %r14, exp at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -3106,10 +3100,9 @@ define void @constrained_vector_exp2_v3f64(ptr %a) #0 {
; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: brasl %r14, exp2 at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -3423,10 +3416,9 @@ define void @constrained_vector_log_v3f64(ptr %a) #0 {
; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: brasl %r14, log at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -3740,10 +3732,9 @@ define void @constrained_vector_log10_v3f64(ptr %a) #0 {
; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: brasl %r14, log10 at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -4057,10 +4048,9 @@ define void @constrained_vector_log2_v3f64(ptr %a) #0 {
; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: brasl %r14, log2 at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -4788,10 +4778,9 @@ define void @constrained_vector_log10_maxnum_v3f64(ptr %a) #0 {
; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: brasl %r14, fmax at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -5165,11 +5154,10 @@ define void @constrained_vector_minnum_v3f64(ptr %a) #0 {
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: ldr %f2, %f9
; SZ13-NEXT: brasl %r14, fmin at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 200(%r15) # 8-byte Reload
; SZ13-NEXT: ld %f9, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 312(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -6612,10 +6600,9 @@ define void @constrained_vector_tan_v3f64(ptr %a) #0 {
; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Folded Spill
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: brasl %r14, tan at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 192(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 304(%r15)
; SZ13-NEXT: br %r14
entry:
@@ -6977,11 +6964,10 @@ define void @constrained_vector_atan2_v3f64(ptr %a, ptr %b) #0 {
; SZ13-NEXT: ldr %f0, %f8
; SZ13-NEXT: ldr %f2, %f9
; SZ13-NEXT: brasl %r14, atan2 at PLT
-; SZ13-NEXT: std %f0, 16(%r13)
-; SZ13-NEXT: vl %v0, 160(%r15), 3 # 16-byte Folded Reload
+; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload
; SZ13-NEXT: ld %f8, 216(%r15) # 8-byte Reload
; SZ13-NEXT: ld %f9, 208(%r15) # 8-byte Reload
-; SZ13-NEXT: vst %v0, 0(%r13), 4
+; SZ13-NEXT: std %f0, 16(%r13)
; SZ13-NEXT: lmg %r13, %r15, 328(%r15)
; SZ13-NEXT: br %r14
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/135623
More information about the llvm-commits
mailing list