[llvm] [NVPTX] Add patterns for fma.relu.{f16|bf16} (PR #114977)
Hugh Delaney via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 11:48:17 PST 2024
================
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -march=nvptx64 --enable-unsafe-fp-math -mcpu=sm_80 -mattr=ptx70 -verify-machineinstrs -fp-contract=fast -nvptx-fma-level=2 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=ptx70 -verify-machineinstrs -fp-contract=fast -nvptx-fma-level=2 | %ptxas-verify -arch=sm_80 %}
+
+define half @fma_f16(half %a, half %b, half %c) {
+; CHECK-LABEL: fma_f16(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [fma_f16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [fma_f16_param_1];
+; CHECK-NEXT: ld.param.b16 %rs3, [fma_f16_param_2];
+; CHECK-NEXT: fma.rn.relu %rs4, %rs1, %rs2, %rs3;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs4;
+; CHECK-NEXT: ret;
+ %1 = call half @llvm.fma.f16(half %a, half %b, half %c)
+ %2 = fcmp ogt half %1, 0.0
+ %3 = select i1 %2, half %1, half 0.0
+ ret half %3
+}
+
+define half @fma_f16_expanded(half %a, half %b, half %c) {
+; CHECK-LABEL: fma_f16_expanded(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [fma_f16_expanded_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [fma_f16_expanded_param_1];
+; CHECK-NEXT: ld.param.b16 %rs3, [fma_f16_expanded_param_2];
+; CHECK-NEXT: fma.rn.relu %rs4, %rs1, %rs2, %rs3;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs4;
+; CHECK-NEXT: ret;
+ %1 = fmul half %a, %b
+ %2 = fadd half %1, %c
----------------
hdelan wrote:
I've added more tests to cover these cases.
https://github.com/llvm/llvm-project/pull/114977
More information about the llvm-commits
mailing list