[llvm] 748ae69 - [RISCV] Add fastcc support for bf16 with Zfbfmin.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 16:50:54 PDT 2024
Author: Craig Topper
Date: 2024-09-06T16:50:25-07:00
New Revision: 748ae69b5d0ed9205e9da5121bd780d283aab164
URL: https://github.com/llvm/llvm-project/commit/748ae69b5d0ed9205e9da5121bd780d283aab164
DIFF: https://github.com/llvm/llvm-project/commit/748ae69b5d0ed9205e9da5121bd780d283aab164.diff
LOG: [RISCV] Add fastcc support for bf16 with Zfbfmin.
Added:
llvm/test/CodeGen/RISCV/fastcc-bf16.ll
Modified:
llvm/lib/Target/RISCV/RISCVCallingConv.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index 48c2e526dd845e..c1a5620ff351da 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -531,7 +531,8 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
- if (LocVT == MVT::f16 && Subtarget.hasStdExtZfhmin()) {
+ if ((LocVT == MVT::f16 && Subtarget.hasStdExtZfhmin()) ||
+ (LocVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())) {
static const MCPhysReg FPR16List[] = {
RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
@@ -584,7 +585,7 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
- if (LocVT == MVT::f16) {
+ if (LocVT == MVT::f16 || LocVT == MVT::bf16) {
unsigned Offset2 = State.AllocateStack(2, Align(2));
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
return false;
diff --git a/llvm/test/CodeGen/RISCV/fastcc-bf16.ll b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll
new file mode 100644
index 00000000000000..493bc63992547e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -target-abi=ilp32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s
+
+define fastcc bfloat @callee(<32 x bfloat> %A) nounwind {
+; CHECK-LABEL: callee:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.h a0, fa0
+; CHECK-NEXT: ret
+ %B = extractelement <32 x bfloat> %A, i32 0
+ ret bfloat %B
+}
+
+; With the fastcc, arguments will be passed by fa0-fa7 and ft0-ft11.
+; The rest will be pushed on the stack.
+define bfloat @caller(<32 x bfloat> %A) nounwind {
+; CHECK-LABEL: caller:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -32
+; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.h.x fa0, a0
+; CHECK-NEXT: fmv.h.x fa1, a1
+; CHECK-NEXT: fmv.h.x fa2, a2
+; CHECK-NEXT: fmv.h.x fa3, a3
+; CHECK-NEXT: fmv.h.x fa4, a4
+; CHECK-NEXT: flh ft0, 32(sp)
+; CHECK-NEXT: flh ft1, 36(sp)
+; CHECK-NEXT: flh ft2, 40(sp)
+; CHECK-NEXT: flh ft3, 44(sp)
+; CHECK-NEXT: flh ft4, 48(sp)
+; CHECK-NEXT: flh ft5, 52(sp)
+; CHECK-NEXT: flh ft6, 56(sp)
+; CHECK-NEXT: flh ft7, 60(sp)
+; CHECK-NEXT: flh ft8, 64(sp)
+; CHECK-NEXT: flh ft9, 68(sp)
+; CHECK-NEXT: flh ft10, 72(sp)
+; CHECK-NEXT: flh ft11, 76(sp)
+; CHECK-NEXT: flh fs0, 80(sp)
+; CHECK-NEXT: flh fs1, 84(sp)
+; CHECK-NEXT: flh fs2, 88(sp)
+; CHECK-NEXT: flh fs3, 92(sp)
+; CHECK-NEXT: flh fs4, 96(sp)
+; CHECK-NEXT: flh fs5, 100(sp)
+; CHECK-NEXT: flh fs6, 104(sp)
+; CHECK-NEXT: flh fs7, 108(sp)
+; CHECK-NEXT: flh fs8, 112(sp)
+; CHECK-NEXT: flh fs9, 116(sp)
+; CHECK-NEXT: flh fs10, 120(sp)
+; CHECK-NEXT: flh fs11, 124(sp)
+; CHECK-NEXT: fmv.h.x fa5, a5
+; CHECK-NEXT: fmv.h.x fa6, a6
+; CHECK-NEXT: fmv.h.x fa7, a7
+; CHECK-NEXT: fsh fs11, 22(sp)
+; CHECK-NEXT: fsh fs10, 20(sp)
+; CHECK-NEXT: fsh fs9, 18(sp)
+; CHECK-NEXT: fsh fs8, 16(sp)
+; CHECK-NEXT: fsh fs7, 14(sp)
+; CHECK-NEXT: fsh fs6, 12(sp)
+; CHECK-NEXT: fsh fs5, 10(sp)
+; CHECK-NEXT: fsh fs4, 8(sp)
+; CHECK-NEXT: fsh fs3, 6(sp)
+; CHECK-NEXT: fsh fs2, 4(sp)
+; CHECK-NEXT: fsh fs1, 2(sp)
+; CHECK-NEXT: fsh fs0, 0(sp)
+; CHECK-NEXT: call callee
+; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 32
+; CHECK-NEXT: ret
+ %C = call fastcc bfloat @callee(<32 x bfloat> %A)
+ ret bfloat %C
+}
More information about the llvm-commits
mailing list