[llvm] f6dacda - [RISCV] Fold vfmv.f.s into load from stack (#110129)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 18:35:18 PDT 2024
Author: Luke Lau
Date: 2024-09-27T09:35:14+08:00
New Revision: f6dacda94907c83942760dd49578a31fc5f990bf
URL: https://github.com/llvm/llvm-project/commit/f6dacda94907c83942760dd49578a31fc5f990bf
DIFF: https://github.com/llvm/llvm-project/commit/f6dacda94907c83942760dd49578a31fc5f990bf.diff
LOG: [RISCV] Fold vfmv.f.s into load from stack (#110129)
This is the f64/f32 version of #109774.
I've left out f16 and bf16 for now because there's a separate issue
where we can't select extract_vector_elt when f16/bf16 is a legal type,
see #110126.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 8dafd824963c09..10b4e4870aebe3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -784,6 +784,24 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
}
break;
}
+ if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VFMV_F_S) {
+ unsigned Log2SEW =
+ MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
+ switch (Log2SEW) {
+ case 4:
+ // TODO: Support f16/bf16
+ return nullptr;
+ case 5:
+ LoadOpc = RISCV::FLW;
+ break;
+ case 6:
+ LoadOpc = RISCV::FLD;
+ break;
+ default:
+ llvm_unreachable("Unexpected SEW");
+ }
+ break;
+ }
return nullptr;
case RISCV::SEXT_H:
LoadOpc = RISCV::LH;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
index 1395dc914bb402..3c184c112e77a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll
@@ -2261,10 +2261,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
@@ -2394,10 +2391,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: call __fixunsdfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixunsdfti
; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: snez a2, s1
@@ -2506,10 +2500,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: mv a2, s1
; CHECK-V-NEXT: blez s1, .LBB20_2
@@ -2668,10 +2659,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
@@ -2801,10 +2789,7 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: call __fixunssfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixunssfti
; CHECK-V-NEXT: snez a1, a1
; CHECK-V-NEXT: snez a2, s1
@@ -2913,10 +2898,7 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: mv a2, s1
; CHECK-V-NEXT: blez s1, .LBB23_2
@@ -5597,10 +5579,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
@@ -5831,10 +5810,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: fld fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixdfti
; CHECK-V-NEXT: mv a2, a1
; CHECK-V-NEXT: blez a1, .LBB47_2
@@ -5983,10 +5959,7 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: li a2, -1
; CHECK-V-NEXT: srli a3, a2, 1
@@ -6217,10 +6190,7 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: mv s0, a0
; CHECK-V-NEXT: mv s1, a1
-; CHECK-V-NEXT: addi a0, sp, 32
-; CHECK-V-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; CHECK-V-NEXT: vfmv.f.s fa0, v8
+; CHECK-V-NEXT: flw fa0, 32(sp) # 8-byte Folded Reload
; CHECK-V-NEXT: call __fixsfti
; CHECK-V-NEXT: mv a2, a1
; CHECK-V-NEXT: blez a1, .LBB50_2
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
index 4771d7fe6ec92b..f966835622a9f3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
@@ -160,3 +160,100 @@ truebb:
falsebb:
ret i8 0
}
+
+define double @f64(<vscale x 1 x double> %v, i1 %c) {
+; RV32-LABEL: f64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 1
+; RV32-NEXT: sub sp, sp, a1
+; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; RV32-NEXT: addi a1, sp, 16
+; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: #APP
+; RV32-NEXT: #NO_APP
+; RV32-NEXT: beqz a0, .LBB4_2
+; RV32-NEXT: # %bb.1: # %truebb
+; RV32-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload
+; RV32-NEXT: j .LBB4_3
+; RV32-NEXT: .LBB4_2: # %falsebb
+; RV32-NEXT: fcvt.d.w fa0, zero
+; RV32-NEXT: .LBB4_3: # %falsebb
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: add sp, sp, a0
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: f64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: csrr a1, vlenb
+; RV64-NEXT: slli a1, a1, 1
+; RV64-NEXT: sub sp, sp, a1
+; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; RV64-NEXT: addi a1, sp, 16
+; RV64-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: #APP
+; RV64-NEXT: #NO_APP
+; RV64-NEXT: beqz a0, .LBB4_2
+; RV64-NEXT: # %bb.1: # %truebb
+; RV64-NEXT: fld fa0, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT: j .LBB4_3
+; RV64-NEXT: .LBB4_2: # %falsebb
+; RV64-NEXT: fmv.d.x fa0, zero
+; RV64-NEXT: .LBB4_3: # %falsebb
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: add sp, sp, a0
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+ tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ br i1 %c, label %truebb, label %falsebb
+truebb:
+ %x = extractelement <vscale x 1 x double> %v, i32 0
+ ret double %x
+falsebb:
+ ret double 0.0
+}
+
+define float @f32(<vscale x 2 x float> %v, i1 %c) {
+; CHECK-LABEL: f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 1
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: #APP
+; CHECK-NEXT: #NO_APP
+; CHECK-NEXT: beqz a0, .LBB5_2
+; CHECK-NEXT: # %bb.1: # %truebb
+; CHECK-NEXT: flw fa0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT: j .LBB5_3
+; CHECK-NEXT: .LBB5_2: # %falsebb
+; CHECK-NEXT: fmv.w.x fa0, zero
+; CHECK-NEXT: .LBB5_3: # %falsebb
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ br i1 %c, label %truebb, label %falsebb
+truebb:
+ %x = extractelement <vscale x 2 x float> %v, i32 0
+ ret float %x
+falsebb:
+ ret float 0.0
+}
+
More information about the llvm-commits
mailing list