[llvm] [RISCV] Fold vmv.s.x into load from stack (PR #109774)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 24 11:35:07 PDT 2024
================
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32 %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64 %s
+
+define i64 @i64(<vscale x 1 x i64> %v, i1 %c) {
+; RV32-LABEL: i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 1
+; RV32-NEXT: sub sp, sp, a1
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: #APP
+; RV32-NEXT: #NO_APP
+; RV32-NEXT: beqz a0, .LBB0_2
+; RV32-NEXT: # %bb.1: # %truebb
+; RV32-NEXT: li a0, 32
+; RV32-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vsrl.vx v8, v9, a0
+; RV32-NEXT: vmv.x.s a1, v8
+; RV32-NEXT: vmv.x.s a0, v9
+; RV32-NEXT: j .LBB0_3
+; RV32-NEXT: .LBB0_2: # %falsebb
+; RV32-NEXT: li a1, 0
+; RV32-NEXT: .LBB0_3: # %falsebb
+; RV32-NEXT: csrr a2, vlenb
+; RV32-NEXT: slli a2, a2, 1
+; RV32-NEXT: add sp, sp, a2
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 1
+; RV64-NEXT: sub sp, sp, a1
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: #APP
+; RV64-NEXT: #NO_APP
+; RV64-NEXT: beqz a0, .LBB0_2
+; RV64-NEXT: # %bb.1: # %truebb
+; RV64-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT: .LBB0_2: # %falsebb
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 1
+; RV64-NEXT: add sp, sp, a1
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+ tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ br i1 %c, label %truebb, label %falsebb
+truebb:
+ %x = extractelement <vscale x 1 x i64> %v, i32 0
+ ret i64 %x
+falsebb:
+ ret i64 0
+}
+
+define i32 @i32(<vscale x 2 x i32> %v, i1 %c) {
+; CHECK-LABEL: i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: beqz a0, .LBB1_2
+; CHECK-NEXT: # %bb.1: # %truebb
+; CHECK-NEXT: lw a0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: .LBB1_2: # %falsebb
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: add sp, sp, a1
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ br i1 %c, label %truebb, label %falsebb
+truebb:
+ %x = extractelement <vscale x 2 x i32> %v, i32 0
+ ret i32 %x
+falsebb:
+ ret i32 0
+}
+
+define i16 @i16(<vscale x 4 x i16> %v, i1 %c) {
+; CHECK-LABEL: i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
----------------
lukel97 wrote:
Yeah that's why these tests have the branch, to prevent the scheduler from moving the extractelement above the asm. I couldn't use the store volatile trick that I used in remat.ll since it interferes with the reload.
I have no idea why the stack size is 2 * VLENB though.
https://github.com/llvm/llvm-project/pull/109774
More information about the llvm-commits
mailing list