[llvm] 13c2378 - [RISCV] Expand test coverage for DAG store merging
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 21 07:47:30 PDT 2025
Author: Philip Reames
Date: 2025-03-21T07:47:18-07:00
New Revision: 13c2378599ffd887319b2d314f307ea761596ec1
URL: https://github.com/llvm/llvm-project/commit/13c2378599ffd887319b2d314f307ea761596ec1
DIFF: https://github.com/llvm/llvm-project/commit/13c2378599ffd887319b2d314f307ea761596ec1.diff
LOG: [RISCV] Expand test coverage for DAG store merging
Two sets of additions:
1) Exercise the rotation path, both for integer and float
2) Exercise the aligned and unaligned paths separately
Added:
llvm/test/CodeGen/RISCV/rvv/stores-of-loads-merging.ll
Modified:
Removed:
llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll b/llvm/test/CodeGen/RISCV/rvv/stores-of-loads-merging.ll
similarity index 69%
rename from llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll
rename to llvm/test/CodeGen/RISCV/rvv/stores-of-loads-merging.ll
index 940412c75ca2b..7699bb1ae1371 100644
--- a/llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stores-of-loads-merging.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,V
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b | FileCheck %s --check-prefixes=CHECK,V
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
declare void @g()
@@ -135,6 +135,48 @@ define void @i8_i16(ptr %p, ptr %q) {
ret void
}
+define void @i8_i16_rotate(ptr %p, ptr %q) {
+; CHECK-LABEL: i8_i16_rotate:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: .cfi_offset s2, -32
+; CHECK-NEXT: lbu s1, 0(a0)
+; CHECK-NEXT: lbu s2, 1(a0)
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call g
+; CHECK-NEXT: sb s2, 0(s0)
+; CHECK-NEXT: sb s1, 1(s0)
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: .cfi_restore s1
+; CHECK-NEXT: .cfi_restore s2
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %p0 = getelementptr i8, ptr %p, i64 0
+ %p1 = getelementptr i8, ptr %p, i64 1
+ %x0 = load i8, ptr %p0, align 2
+ %x1 = load i8, ptr %p1
+ call void @g()
+ %q0 = getelementptr i8, ptr %q, i64 0
+ %q1 = getelementptr i8, ptr %q, i64 1
+ store i8 %x1, ptr %q0, align 2
+ store i8 %x0, ptr %q1
+ ret void
+}
+
; We could reorder the first call and the load here to enable
; merging, but don't currently do so.
define void @i8_i16_resched_readnone_ld(ptr %p, ptr %q) {
@@ -228,6 +270,78 @@ define void @i8_i16_resched_readnone_st(ptr %p, ptr %q) {
ret void
}
+define void @i32_i64(ptr %p, ptr %q) {
+; CHECK-LABEL: i32_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: ld s1, 0(a0)
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call g
+; CHECK-NEXT: sd s1, 0(s0)
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: .cfi_restore s1
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %p0 = getelementptr i8, ptr %p, i64 0
+ %p1 = getelementptr i8, ptr %p, i64 4
+ %x0 = load i32, ptr %p0, align 8
+ %x1 = load i32, ptr %p1
+ call void @g()
+ %q0 = getelementptr i8, ptr %q, i64 0
+ %q1 = getelementptr i8, ptr %q, i64 4
+ store i32 %x0, ptr %q0, align 8
+ store i32 %x1, ptr %q1
+ ret void
+}
+
+define void @i32_i64_rotate(ptr %p, ptr %q) {
+; CHECK-LABEL: i32_i64_rotate:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: rori s1, a0, 32
+; CHECK-NEXT: call g
+; CHECK-NEXT: sd s1, 0(s0)
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: .cfi_restore s1
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %p0 = getelementptr i8, ptr %p, i64 0
+ %p1 = getelementptr i8, ptr %p, i64 4
+ %x0 = load i32, ptr %p0, align 8
+ %x1 = load i32, ptr %p1
+ call void @g()
+ %q0 = getelementptr i8, ptr %q, i64 0
+ %q1 = getelementptr i8, ptr %q, i64 4
+ store i32 %x1, ptr %q0, align 8
+ store i32 %x0, ptr %q1
+ ret void
+}
; Merging vectors is profitable, it reduces pressure within a single
; register class.
@@ -305,8 +419,7 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
; CHECK-NEXT: vsetvli zero, s1, e8, m2, ta, ma
; CHECK-NEXT: vse8.v v8, (s0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: sh1add sp, a0, sp
; CHECK-NEXT: .cfi_def_cfa sp, 64
; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
@@ -329,9 +442,44 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
ret void
}
-; TODO: We fail to merge these, which would be profitable.
define void @two_half(ptr %p, ptr %q) {
-; V-LABEL: two_half:
+; CHECK-LABEL: two_half:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: lw s1, 0(a0)
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call g
+; CHECK-NEXT: sw s1, 0(s0)
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: .cfi_restore s1
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %p0 = getelementptr i8, ptr %p, i64 0
+ %p1 = getelementptr i8, ptr %p, i64 2
+ %x0 = load half, ptr %p0, align 4
+ %x1 = load half, ptr %p1
+ call void @g()
+ %q0 = getelementptr i8, ptr %q, i64 0
+ %q1 = getelementptr i8, ptr %q, i64 2
+ store half %x0, ptr %q0, align 4
+ store half %x1, ptr %q1
+ ret void
+}
+
+define void @two_half_unaligned(ptr %p, ptr %q) {
+; V-LABEL: two_half_unaligned:
; V: # %bb.0:
; V-NEXT: addi sp, sp, -32
; V-NEXT: .cfi_def_cfa_offset 32
@@ -361,7 +509,7 @@ define void @two_half(ptr %p, ptr %q) {
; V-NEXT: .cfi_def_cfa_offset 0
; V-NEXT: ret
;
-; ZVFH-LABEL: two_half:
+; ZVFH-LABEL: two_half_unaligned:
; ZVFH: # %bb.0:
; ZVFH-NEXT: addi sp, sp, -32
; ZVFH-NEXT: .cfi_def_cfa_offset 32
@@ -404,6 +552,7 @@ define void @two_half(ptr %p, ptr %q) {
ret void
}
+
; TODO: This one is currently a vector which is unprofitable, we should
; use i64 instead.
define void @two_float(ptr %p, ptr %q) {
@@ -413,6 +562,42 @@ define void @two_float(ptr %p, ptr %q) {
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: ld s1, 0(a0)
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: call g
+; CHECK-NEXT: sd s1, 0(s0)
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: .cfi_restore s1
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %p0 = getelementptr i8, ptr %p, i64 0
+ %p1 = getelementptr i8, ptr %p, i64 4
+ %x0 = load float, ptr %p0, align 8
+ %x1 = load float, ptr %p1
+ call void @g()
+ %q0 = getelementptr i8, ptr %q, i64 0
+ %q1 = getelementptr i8, ptr %q, i64 4
+ store float %x0, ptr %q0, align 8
+ store float %x1, ptr %q1
+ ret void
+}
+
+define void @two_float_unaligned(ptr %p, ptr %q) {
+; CHECK-LABEL: two_float_unaligned:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: csrr a2, vlenb
@@ -450,6 +635,43 @@ define void @two_float(ptr %p, ptr %q) {
ret void
}
+define void @two_float_rotate(ptr %p, ptr %q) {
+; CHECK-LABEL: two_float_rotate:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset ra, -8
+; CHECK-NEXT: .cfi_offset s0, -16
+; CHECK-NEXT: .cfi_offset s1, -24
+; CHECK-NEXT: mv s0, a1
+; CHECK-NEXT: ld a0, 0(a0)
+; CHECK-NEXT: rori s1, a0, 32
+; CHECK-NEXT: call g
+; CHECK-NEXT: sd s1, 0(s0)
+; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .cfi_restore ra
+; CHECK-NEXT: .cfi_restore s0
+; CHECK-NEXT: .cfi_restore s1
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+ %p0 = getelementptr i8, ptr %p, i64 0
+ %p1 = getelementptr i8, ptr %p, i64 4
+ %x0 = load float, ptr %p0, align 8
+ %x1 = load float, ptr %p1
+ call void @g()
+ %q0 = getelementptr i8, ptr %q, i64 0
+ %q1 = getelementptr i8, ptr %q, i64 4
+ store float %x1, ptr %q0, align 8
+ store float %x0, ptr %q1
+ ret void
+}
+
define void @two_double(ptr %p, ptr %q) {
; CHECK-LABEL: two_double:
; CHECK: # %bb.0:
More information about the llvm-commits
mailing list