[llvm] 4ab011a - [RISCV] Precommit store merge tests for pr130430
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 12:58:12 PDT 2025
Author: Philip Reames
Date: 2025-03-18T12:57:56-07:00
New Revision: 4ab011a9502218fc08a609348054307b4a651d83
URL: https://github.com/llvm/llvm-project/commit/4ab011a9502218fc08a609348054307b4a651d83
DIFF: https://github.com/llvm/llvm-project/commit/4ab011a9502218fc08a609348054307b4a651d83.diff
LOG: [RISCV] Precommit store merge tests for pr130430
Added:
Modified:
llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll
index b2be401b4676f..9a1b85defeaaa 100644
--- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll
+++ b/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll
@@ -4,6 +4,8 @@
declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
declare void @g()
+; TODO: Merging scalars into vectors is unprofitable because we have no
+; vector CSRs which creates additional spills around the call.
define void @f(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
; CHECK-LABEL: f:
; CHECK: # %bb.0:
@@ -93,3 +95,148 @@ define void @f1(ptr %m, ptr %n, ptr %p, ptr %q, ptr %r, ptr %s, double %t) {
ret void
}
+
+; Merging scalars is profitable, it reduces pressure within a single
+; register class.
+define void @i8_i16(ptr %p, ptr %q) {
+; CHECK-LABEL: i8_i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: lh s1, 0(a0)
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call g
+; CHECK-NEXT: sh s1, 0(s0)
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: .cfi_restore s1
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %p0 = getelementptr i8, ptr %p, i64 0
+ %p1 = getelementptr i8, ptr %p, i64 1
+ %x0 = load i8, ptr %p0, align 2
+ %x1 = load i8, ptr %p1
+ call void @g()
+ %q0 = getelementptr i8, ptr %q, i64 0
+ %q1 = getelementptr i8, ptr %q, i64 1
+ store i8 %x0, ptr %q0, align 2
+ store i8 %x1, ptr %q1
+ ret void
+}
+
+; Merging vectors is profitable, it reduces pressure within a single
+; register class.
+define void @v2i8_v4i8(ptr %p, ptr %q) {
+; CHECK-LABEL: v2i8_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 1 * vlenb
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call g
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vse8.v v8, (s0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: .cfi_def_cfa sp, 32
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %p0 = getelementptr i8, ptr %p, i64 0
+ %p1 = getelementptr i8, ptr %p, i64 2
+ %x0 = load <2 x i8>, ptr %p0, align 2
+ %x1 = load <2 x i8>, ptr %p1
+ call void @g()
+ %q0 = getelementptr i8, ptr %q, i64 0
+ %q1 = getelementptr i8, ptr %q, i64 2
+ store <2 x i8> %x0, ptr %q0, align 2
+ store <2 x i8> %x1, ptr %q1
+ ret void
+}
+
+; Merging two 16 x i8 into one 32 x i8 (on zvl128b) will require the same
+; numbers of registers to be spilled, but it can be done with fewer
+; instructions
+define void @v16i8_v32i8(ptr %p, ptr %q) {
+; CHECK-LABEL: v16i8_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: csrr a2, vlenb
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: sub sp, sp, a2
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; CHECK-NEXT: addi a2, a0, 16
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vle8.v v8, (a2)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call g
+; CHECK-NEXT: addi a0, s0, 2
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (s0)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: .cfi_def_cfa sp, 32
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %p0 = getelementptr i8, ptr %p, i64 0
+ %p1 = getelementptr i8, ptr %p, i64 16
+ %x0 = load <16 x i8>, ptr %p0, align 2
+ %x1 = load <16 x i8>, ptr %p1
+ call void @g()
+ %q0 = getelementptr i8, ptr %q, i64 0
+ %q1 = getelementptr i8, ptr %q, i64 2
+ store <16 x i8> %x0, ptr %q0, align 16
+ store <16 x i8> %x1, ptr %q1
+ ret void
+}
More information about the llvm-commits
mailing list