[llvm] 748ae69 - [RISCV] Add fastcc support for bf16 with Zfbfmin.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 6 16:50:54 PDT 2024


Author: Craig Topper
Date: 2024-09-06T16:50:25-07:00
New Revision: 748ae69b5d0ed9205e9da5121bd780d283aab164

URL: https://github.com/llvm/llvm-project/commit/748ae69b5d0ed9205e9da5121bd780d283aab164
DIFF: https://github.com/llvm/llvm-project/commit/748ae69b5d0ed9205e9da5121bd780d283aab164.diff

LOG: [RISCV] Add fastcc support for bf16 with Zfbfmin.

Added: 
    llvm/test/CodeGen/RISCV/fastcc-bf16.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVCallingConv.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index 48c2e526dd845e..c1a5620ff351da 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -531,7 +531,8 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
     }
   }
 
-  if (LocVT == MVT::f16 && Subtarget.hasStdExtZfhmin()) {
+  if ((LocVT == MVT::f16 && Subtarget.hasStdExtZfhmin()) ||
+      (LocVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())) {
     static const MCPhysReg FPR16List[] = {
         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
@@ -584,7 +585,7 @@ bool llvm::CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
     }
   }
 
-  if (LocVT == MVT::f16) {
+  if (LocVT == MVT::f16 || LocVT == MVT::bf16) {
     unsigned Offset2 = State.AllocateStack(2, Align(2));
     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
     return false;

diff  --git a/llvm/test/CodeGen/RISCV/fastcc-bf16.ll b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll
new file mode 100644
index 00000000000000..493bc63992547e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zfbfmin -target-abi=ilp32 -verify-machineinstrs < %s \
+; RUN:     | FileCheck %s
+
+define fastcc bfloat @callee(<32 x bfloat> %A) nounwind {
+; CHECK-LABEL: callee:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fmv.x.h a0, fa0
+; CHECK-NEXT:    ret
+	%B = extractelement <32 x bfloat> %A, i32 0
+	ret bfloat %B
+}
+
+; With the fastcc, arguments will be passed by fa0-fa7 and ft0-ft11.
+; The rest will be pushed on the stack.
+define bfloat @caller(<32 x bfloat> %A) nounwind {
+; CHECK-LABEL: caller:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -32
+; CHECK-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    fmv.h.x fa0, a0
+; CHECK-NEXT:    fmv.h.x fa1, a1
+; CHECK-NEXT:    fmv.h.x fa2, a2
+; CHECK-NEXT:    fmv.h.x fa3, a3
+; CHECK-NEXT:    fmv.h.x fa4, a4
+; CHECK-NEXT:    flh ft0, 32(sp)
+; CHECK-NEXT:    flh ft1, 36(sp)
+; CHECK-NEXT:    flh ft2, 40(sp)
+; CHECK-NEXT:    flh ft3, 44(sp)
+; CHECK-NEXT:    flh ft4, 48(sp)
+; CHECK-NEXT:    flh ft5, 52(sp)
+; CHECK-NEXT:    flh ft6, 56(sp)
+; CHECK-NEXT:    flh ft7, 60(sp)
+; CHECK-NEXT:    flh ft8, 64(sp)
+; CHECK-NEXT:    flh ft9, 68(sp)
+; CHECK-NEXT:    flh ft10, 72(sp)
+; CHECK-NEXT:    flh ft11, 76(sp)
+; CHECK-NEXT:    flh fs0, 80(sp)
+; CHECK-NEXT:    flh fs1, 84(sp)
+; CHECK-NEXT:    flh fs2, 88(sp)
+; CHECK-NEXT:    flh fs3, 92(sp)
+; CHECK-NEXT:    flh fs4, 96(sp)
+; CHECK-NEXT:    flh fs5, 100(sp)
+; CHECK-NEXT:    flh fs6, 104(sp)
+; CHECK-NEXT:    flh fs7, 108(sp)
+; CHECK-NEXT:    flh fs8, 112(sp)
+; CHECK-NEXT:    flh fs9, 116(sp)
+; CHECK-NEXT:    flh fs10, 120(sp)
+; CHECK-NEXT:    flh fs11, 124(sp)
+; CHECK-NEXT:    fmv.h.x fa5, a5
+; CHECK-NEXT:    fmv.h.x fa6, a6
+; CHECK-NEXT:    fmv.h.x fa7, a7
+; CHECK-NEXT:    fsh fs11, 22(sp)
+; CHECK-NEXT:    fsh fs10, 20(sp)
+; CHECK-NEXT:    fsh fs9, 18(sp)
+; CHECK-NEXT:    fsh fs8, 16(sp)
+; CHECK-NEXT:    fsh fs7, 14(sp)
+; CHECK-NEXT:    fsh fs6, 12(sp)
+; CHECK-NEXT:    fsh fs5, 10(sp)
+; CHECK-NEXT:    fsh fs4, 8(sp)
+; CHECK-NEXT:    fsh fs3, 6(sp)
+; CHECK-NEXT:    fsh fs2, 4(sp)
+; CHECK-NEXT:    fsh fs1, 2(sp)
+; CHECK-NEXT:    fsh fs0, 0(sp)
+; CHECK-NEXT:    call callee
+; CHECK-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 32
+; CHECK-NEXT:    ret
+	%C = call fastcc bfloat @callee(<32 x bfloat> %A)
+	ret bfloat %C
+}


        


More information about the llvm-commits mailing list