[llvm] a81902f - [RISCV] Fold vfmv.f.s of f16 into load from stack (#110214)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 30 23:09:59 PDT 2024
Author: Luke Lau
Date: 2024-10-01T14:09:56+08:00
New Revision: a81902ffc9b2074729c34160a5e111f139e21ccf
URL: https://github.com/llvm/llvm-project/commit/a81902ffc9b2074729c34160a5e111f139e21ccf
DIFF: https://github.com/llvm/llvm-project/commit/a81902ffc9b2074729c34160a5e111f139e21ccf.diff
LOG: [RISCV] Fold vfmv.f.s of f16 into load from stack (#110214)
After #110144, we can finish off #110129 and fold f16 vfmv.f.s into a
flh.
vfmv.f.s is only available for f16 with zvfh, which in turn requires
zfhmin so we can use flh.
bf16 has no vfmv.f.s so the extract_vector_elt is lowered as an integer
in #110144, and gets the existing integer vmv.x.s fold.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 529944044f02d2..91503bd7f41f3c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -804,8 +804,8 @@ MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
switch (Log2SEW) {
case 4:
- // TODO: Support f16/bf16
- return nullptr;
+ LoadOpc = RISCV::FLH;
+ break;
case 5:
LoadOpc = RISCV::FLW;
break;
diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
index ffe6ff8a91abdb..0b218bbc6fd963 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stack-folding.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32 %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64 %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32,ZFMIN %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64,ZFMIN %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV32,NOZFMIN %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs | FileCheck --check-prefixes=CHECK,RV64,NOZFMIN %s
define i64 @i64(<vscale x 1 x i64> %v, i1 %c) {
; RV32-LABEL: i64:
@@ -241,3 +243,132 @@ falsebb:
ret float 0.0
}
+define half @f16(<vscale x 1 x half> %v, i1 %c) {
+; ZFMIN-LABEL: f16:
+; ZFMIN: # %bb.0:
+; ZFMIN-NEXT: addi sp, sp, -16
+; ZFMIN-NEXT: .cfi_def_cfa_offset 16
+; ZFMIN-NEXT: csrr a1, vlenb
+; ZFMIN-NEXT: slli a1, a1, 1
+; ZFMIN-NEXT: sub sp, sp, a1
+; ZFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZFMIN-NEXT: addi a1, sp, 16
+; ZFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; ZFMIN-NEXT: andi a0, a0, 1
+; ZFMIN-NEXT: #APP
+; ZFMIN-NEXT: #NO_APP
+; ZFMIN-NEXT: beqz a0, .LBB6_2
+; ZFMIN-NEXT: # %bb.1: # %truebb
+; ZFMIN-NEXT: flh fa0, 16(sp) # 8-byte Folded Reload
+; ZFMIN-NEXT: j .LBB6_3
+; ZFMIN-NEXT: .LBB6_2: # %falsebb
+; ZFMIN-NEXT: fmv.h.x fa0, zero
+; ZFMIN-NEXT: .LBB6_3: # %falsebb
+; ZFMIN-NEXT: csrr a0, vlenb
+; ZFMIN-NEXT: slli a0, a0, 1
+; ZFMIN-NEXT: add sp, sp, a0
+; ZFMIN-NEXT: addi sp, sp, 16
+; ZFMIN-NEXT: ret
+;
+; NOZFMIN-LABEL: f16:
+; NOZFMIN: # %bb.0:
+; NOZFMIN-NEXT: addi sp, sp, -16
+; NOZFMIN-NEXT: .cfi_def_cfa_offset 16
+; NOZFMIN-NEXT: csrr a1, vlenb
+; NOZFMIN-NEXT: slli a1, a1, 1
+; NOZFMIN-NEXT: sub sp, sp, a1
+; NOZFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; NOZFMIN-NEXT: addi a1, sp, 16
+; NOZFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; NOZFMIN-NEXT: andi a0, a0, 1
+; NOZFMIN-NEXT: #APP
+; NOZFMIN-NEXT: #NO_APP
+; NOZFMIN-NEXT: beqz a0, .LBB6_2
+; NOZFMIN-NEXT: # %bb.1: # %truebb
+; NOZFMIN-NEXT: lh a0, 16(sp) # 8-byte Folded Reload
+; NOZFMIN-NEXT: lui a1, 1048560
+; NOZFMIN-NEXT: or a0, a0, a1
+; NOZFMIN-NEXT: j .LBB6_3
+; NOZFMIN-NEXT: .LBB6_2: # %falsebb
+; NOZFMIN-NEXT: lui a0, 1048560
+; NOZFMIN-NEXT: .LBB6_3: # %falsebb
+; NOZFMIN-NEXT: fmv.w.x fa0, a0
+; NOZFMIN-NEXT: csrr a0, vlenb
+; NOZFMIN-NEXT: slli a0, a0, 1
+; NOZFMIN-NEXT: add sp, sp, a0
+; NOZFMIN-NEXT: addi sp, sp, 16
+; NOZFMIN-NEXT: ret
+ tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ br i1 %c, label %truebb, label %falsebb
+truebb:
+ %x = extractelement <vscale x 1 x half> %v, i32 0
+ ret half %x
+falsebb:
+ ret half 0.0
+}
+
+define bfloat @bf16(<vscale x 2 x bfloat> %v, i1 %c) {
+; ZFMIN-LABEL: bf16:
+; ZFMIN: # %bb.0:
+; ZFMIN-NEXT: addi sp, sp, -16
+; ZFMIN-NEXT: .cfi_def_cfa_offset 16
+; ZFMIN-NEXT: csrr a1, vlenb
+; ZFMIN-NEXT: slli a1, a1, 1
+; ZFMIN-NEXT: sub sp, sp, a1
+; ZFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZFMIN-NEXT: addi a1, sp, 16
+; ZFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; ZFMIN-NEXT: andi a0, a0, 1
+; ZFMIN-NEXT: #APP
+; ZFMIN-NEXT: #NO_APP
+; ZFMIN-NEXT: beqz a0, .LBB7_2
+; ZFMIN-NEXT: # %bb.1: # %truebb
+; ZFMIN-NEXT: lh a0, 16(sp) # 8-byte Folded Reload
+; ZFMIN-NEXT: fmv.h.x fa0, a0
+; ZFMIN-NEXT: j .LBB7_3
+; ZFMIN-NEXT: .LBB7_2: # %falsebb
+; ZFMIN-NEXT: fmv.h.x fa0, zero
+; ZFMIN-NEXT: .LBB7_3: # %falsebb
+; ZFMIN-NEXT: csrr a0, vlenb
+; ZFMIN-NEXT: slli a0, a0, 1
+; ZFMIN-NEXT: add sp, sp, a0
+; ZFMIN-NEXT: addi sp, sp, 16
+; ZFMIN-NEXT: ret
+;
+; NOZFMIN-LABEL: bf16:
+; NOZFMIN: # %bb.0:
+; NOZFMIN-NEXT: addi sp, sp, -16
+; NOZFMIN-NEXT: .cfi_def_cfa_offset 16
+; NOZFMIN-NEXT: csrr a1, vlenb
+; NOZFMIN-NEXT: slli a1, a1, 1
+; NOZFMIN-NEXT: sub sp, sp, a1
+; NOZFMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; NOZFMIN-NEXT: addi a1, sp, 16
+; NOZFMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; NOZFMIN-NEXT: andi a0, a0, 1
+; NOZFMIN-NEXT: #APP
+; NOZFMIN-NEXT: #NO_APP
+; NOZFMIN-NEXT: beqz a0, .LBB7_2
+; NOZFMIN-NEXT: # %bb.1: # %truebb
+; NOZFMIN-NEXT: lh a0, 16(sp) # 8-byte Folded Reload
+; NOZFMIN-NEXT: lui a1, 1048560
+; NOZFMIN-NEXT: or a0, a0, a1
+; NOZFMIN-NEXT: j .LBB7_3
+; NOZFMIN-NEXT: .LBB7_2: # %falsebb
+; NOZFMIN-NEXT: lui a0, 1048560
+; NOZFMIN-NEXT: .LBB7_3: # %falsebb
+; NOZFMIN-NEXT: fmv.w.x fa0, a0
+; NOZFMIN-NEXT: csrr a0, vlenb
+; NOZFMIN-NEXT: slli a0, a0, 1
+; NOZFMIN-NEXT: add sp, sp, a0
+; NOZFMIN-NEXT: addi sp, sp, 16
+; NOZFMIN-NEXT: ret
+ tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ br i1 %c, label %truebb, label %falsebb
+truebb:
+ %x = extractelement <vscale x 2 x bfloat> %v, i32 0
+ ret bfloat %x
+falsebb:
+ ret bfloat 0.0
+}
+
More information about the llvm-commits
mailing list