[llvm] 934b490 - [RISCV] Expand load extension / truncate store for bf16
Jun Sha via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 31 20:10:24 PDT 2023
Author: Jun Sha (Joshua)
Date: 2023-08-01T11:10:41+08:00
New Revision: 934b4905306d40487d91c121c1d6d1f8ea52919a
URL: https://github.com/llvm/llvm-project/commit/934b4905306d40487d91c121c1d6d1f8ea52919a
DIFF: https://github.com/llvm/llvm-project/commit/934b4905306d40487d91c121c1d6d1f8ea52919a.diff
LOG: [RISCV] Expand load extension / truncate store for bf16
Currentlt, bf16 operations are automatically supported by promoting to float. This patch adds bf16 support by ensuring that load extension / truncate store operations are properly expanded.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D156646
Added:
llvm/test/CodeGen/RISCV/bf16-promote.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 66eb99060264ea..1b86c53c4dfd9f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -446,6 +446,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FPOpToExpand, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f32,
@@ -488,6 +490,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FPOpToExpand, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_BF16, MVT::f64,
diff --git a/llvm/test/CodeGen/RISCV/bf16-promote.ll b/llvm/test/CodeGen/RISCV/bf16-promote.ll
new file mode 100644
index 00000000000000..c8fc84729da79e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/bf16-promote.ll
@@ -0,0 +1,154 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64d < %s | FileCheck --check-prefixes=CHECK,RV64 %s
+; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32d < %s | FileCheck --check-prefixes=CHECK,RV32 %s
+
+define void @test_load_store(ptr %p, ptr %q) nounwind {
+; CHECK-LABEL: test_load_store:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lh a0, 0(a0)
+; CHECK-NEXT: sh a0, 0(a1)
+; CHECK-NEXT: ret
+ %a = load bfloat, ptr %p
+ store bfloat %a, ptr %q
+ ret void
+}
+
+define float @test_fpextend_float(ptr %p) nounwind {
+; CHECK-LABEL: test_fpextend_float:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lhu a0, 0(a0)
+; CHECK-NEXT: slli a0, a0, 16
+; CHECK-NEXT: fmv.w.x fa0, a0
+; CHECK-NEXT: ret
+ %a = load bfloat, ptr %p
+ %r = fpext bfloat %a to float
+ ret float %r
+}
+
+define double @test_fpextend_double(ptr %p) nounwind {
+; CHECK-LABEL: test_fpextend_double:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lhu a0, 0(a0)
+; CHECK-NEXT: slli a0, a0, 16
+; CHECK-NEXT: fmv.w.x fa5, a0
+; CHECK-NEXT: fcvt.d.s fa0, fa5
+; CHECK-NEXT: ret
+ %a = load bfloat, ptr %p
+ %r = fpext bfloat %a to double
+ ret double %r
+}
+
+define void @test_fptrunc_float(float %f, ptr %p) nounwind {
+; RV64-LABEL: test_fptrunc_float:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT: mv s0, a0
+; RV64-NEXT: call __truncsfbf2 at plt
+; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: sh a0, 0(s0)
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_fptrunc_float:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: call __truncsfbf2 at plt
+; RV32-NEXT: fmv.x.w a0, fa0
+; RV32-NEXT: sh a0, 0(s0)
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+ %a = fptrunc float %f to bfloat
+ store bfloat %a, ptr %p
+ ret void
+}
+
+define void @test_fptrunc_double(double %d, ptr %p) nounwind {
+; RV64-LABEL: test_fptrunc_double:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT: mv s0, a0
+; RV64-NEXT: call __truncdfbf2 at plt
+; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: sh a0, 0(s0)
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_fptrunc_double:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: call __truncdfbf2 at plt
+; RV32-NEXT: fmv.x.w a0, fa0
+; RV32-NEXT: sh a0, 0(s0)
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+ %a = fptrunc double %d to bfloat
+ store bfloat %a, ptr %p
+ ret void
+}
+
+define void @test_fadd(ptr %p, ptr %q) nounwind {
+; RV64-LABEL: test_fadd:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
+; RV64-NEXT: lhu a1, 0(a1)
+; RV64-NEXT: mv s0, a0
+; RV64-NEXT: lhu a0, 0(a0)
+; RV64-NEXT: slli a1, a1, 16
+; RV64-NEXT: fmv.w.x fa5, a1
+; RV64-NEXT: slli a0, a0, 16
+; RV64-NEXT: fmv.w.x fa4, a0
+; RV64-NEXT: fadd.s fa0, fa4, fa5
+; RV64-NEXT: call __truncsfbf2 at plt
+; RV64-NEXT: fmv.x.w a0, fa0
+; RV64-NEXT: sh a0, 0(s0)
+; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_fadd:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32-NEXT: lhu a1, 0(a1)
+; RV32-NEXT: mv s0, a0
+; RV32-NEXT: lhu a0, 0(a0)
+; RV32-NEXT: slli a1, a1, 16
+; RV32-NEXT: fmv.w.x fa5, a1
+; RV32-NEXT: slli a0, a0, 16
+; RV32-NEXT: fmv.w.x fa4, a0
+; RV32-NEXT: fadd.s fa0, fa4, fa5
+; RV32-NEXT: call __truncsfbf2 at plt
+; RV32-NEXT: fmv.x.w a0, fa0
+; RV32-NEXT: sh a0, 0(s0)
+; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+ %a = load bfloat, ptr %p
+ %b = load bfloat, ptr %q
+ %r = fadd bfloat %a, %b
+ store bfloat %r, ptr %p
+ ret void
+}
\ No newline at end of file
More information about the llvm-commits
mailing list