[llvm] [NVPTX][NFC] Regenerate some tests checks (PR #116605)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 18 03:45:16 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Fraser Cormack (frasercrmck)
<details>
<summary>Changes</summary>
Use update_llc_test_checks.py to automate the test checks in some files I was observing changes in locally.
---
Patch is 256.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116605.diff
5 Files Affected:
- (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll (+37-24)
- (modified) llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll (+683-344)
- (modified) llvm/test/CodeGen/NVPTX/f16x2-instructions.ll (+1870-988)
- (modified) llvm/test/CodeGen/NVPTX/i16x2-instructions.ll (+701-409)
- (modified) llvm/test/CodeGen/NVPTX/i8x2-instructions.ll (+27-14)
``````````diff
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
index a53c90ac6db8b6..3e54aaf5580729 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | FileCheck --check-prefixes=CHECK %s
; RUN: %if ptxas-11.8 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx71 --enable-unsafe-fp-math | %ptxas-verify -arch=sm_80 %}
@@ -6,36 +7,48 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
declare <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a) #0
declare <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a) #0
-; CHECK-LABEL: test_sin(
-; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_sin_param_0];
-; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.f32.bf16 [[AF0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.bf16 [[AF1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
-; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0], [[R]];
-; CHECK: ret;
define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 {
+; CHECK-LABEL: test_sin(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_sin_param_0];
+; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
+; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2;
+; CHECK-NEXT: sin.approx.f32 %f2, %f1;
+; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
+; CHECK-NEXT: cvt.f32.bf16 %f3, %rs1;
+; CHECK-NEXT: sin.approx.f32 %f4, %f3;
+; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
+; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
%r = call <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a)
ret <2 x bfloat> %r
}
-; CHECK-LABEL: test_cos(
-; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_cos_param_0];
-; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; CHECK-DAG: cvt.f32.bf16 [[AF0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.bf16 [[AF1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]];
-; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]];
-; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[RF0]];
-; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[RF1]];
-; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
-; CHECK: st.param.b32 [func_retval0], [[R]];
-; CHECK: ret;
define <2 x bfloat> @test_cos(<2 x bfloat> %a) #0 #1 {
+; CHECK-LABEL: test_cos(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<5>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .f32 %f<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_cos_param_0];
+; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
+; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2;
+; CHECK-NEXT: cos.approx.f32 %f2, %f1;
+; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
+; CHECK-NEXT: cvt.f32.bf16 %f3, %rs1;
+; CHECK-NEXT: cos.approx.f32 %f4, %f3;
+; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
+; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
%r = call <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a)
ret <2 x bfloat> %r
}
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
index 925ae4245a4c20..e545d4c1177915 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | FileCheck --check-prefixes=CHECK,SM80 %s
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK,SM90 %s
; RUN: %if ptxas-11.8 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx71 | %ptxas-verify -arch=sm_80 %}
@@ -5,163 +6,231 @@
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-; CHECK-LABEL: test_ret_const(
-; CHECK: mov.b32 [[T:%r[0-9+]]], 1073758080;
-; CHECK: st.param.b32 [func_retval0], [[T]];
-; CHECK-NEXT: ret;
-
define <2 x bfloat> @test_ret_const() #0 {
+; CHECK-LABEL: test_ret_const(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: mov.b32 %r1, 1073758080;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
ret <2 x bfloat> <bfloat 1.0, bfloat 2.0>
}
; Check that we can lower fadd with immediate arguments.
-; CHECK-LABEL: test_fadd_imm_0(
-; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fadd_imm_0_param_0];
-;
-; SM90-DAG: mov.b32 [[I:%r[0-9+]]], 1073758080;
-; SM90-DAG: add.rn.bf16x2 [[R:%r[0-9]+]], [[A]], [[I]];
-;
-; SM80-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; SM80-DAG: cvt.f32.bf16 [[FA0:%f[0-9]+]], [[A0]]
-; SM80-DAG: cvt.f32.bf16 [[FA1:%f[0-9]+]], [[A1]]
-; SM80-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000;
-; SM80-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000;
-; SM80-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[FR0]]
-; SM80-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[FR1]]
-; SM80-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
-;
-; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
-; CHECK-NEXT: ret;
-
define <2 x bfloat> @test_fadd_imm_0(<2 x bfloat> %a) #0 {
+; SM80-LABEL: test_fadd_imm_0(
+; SM80: {
+; SM80-NEXT: .reg .b16 %rs<5>;
+; SM80-NEXT: .reg .b32 %r<3>;
+; SM80-NEXT: .reg .f32 %f<5>;
+; SM80-EMPTY:
+; SM80-NEXT: // %bb.0:
+; SM80-NEXT: ld.param.b32 %r1, [test_fadd_imm_0_param_0];
+; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
+; SM80-NEXT: cvt.f32.bf16 %f1, %rs2;
+; SM80-NEXT: add.rn.f32 %f2, %f1, 0f40000000;
+; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
+; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
+; SM80-NEXT: add.rn.f32 %f4, %f3, 0f3F800000;
+; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
+; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
+; SM80-NEXT: st.param.b32 [func_retval0], %r2;
+; SM80-NEXT: ret;
+;
+; SM90-LABEL: test_fadd_imm_0(
+; SM90: {
+; SM90-NEXT: .reg .b32 %r<4>;
+; SM90-EMPTY:
+; SM90-NEXT: // %bb.0:
+; SM90-NEXT: ld.param.b32 %r1, [test_fadd_imm_0_param_0];
+; SM90-NEXT: mov.b32 %r2, 1073758080;
+; SM90-NEXT: add.rn.bf16x2 %r3, %r1, %r2;
+; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: ret;
%r = fadd <2 x bfloat> <bfloat 1.0, bfloat 2.0>, %a
ret <2 x bfloat> %r
}
-; CHECK-LABEL: test_fadd_imm_1(
-; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_imm_1_param_0];
-; SM90: mov.b16 [[B:%rs[0-9]+]], 0x3F80;
-; SM90: add.rn.bf16 [[R:%rs[0-9]+]], [[A]], [[B]];
-
-; SM80-DAG: cvt.f32.bf16 [[FA:%f[0-9]+]], [[A]];
-; SM80: add.rn.f32 [[FR:%f[0-9]+]], [[FA]], 0f3F800000;
-; SM80: cvt.rn.bf16.f32 [[R:%rs[0-9]+]], [[FR]];
-
-; CHECK: st.param.b16 [func_retval0], [[R]];
-; CHECK-NEXT: ret;
-
define bfloat @test_fadd_imm_1(bfloat %a) #0 {
+; SM80-LABEL: test_fadd_imm_1(
+; SM80: {
+; SM80-NEXT: .reg .b16 %rs<3>;
+; SM80-NEXT: .reg .f32 %f<3>;
+; SM80-EMPTY:
+; SM80-NEXT: // %bb.0:
+; SM80-NEXT: ld.param.b16 %rs1, [test_fadd_imm_1_param_0];
+; SM80-NEXT: cvt.f32.bf16 %f1, %rs1;
+; SM80-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
+; SM80-NEXT: cvt.rn.bf16.f32 %rs2, %f2;
+; SM80-NEXT: st.param.b16 [func_retval0], %rs2;
+; SM80-NEXT: ret;
+;
+; SM90-LABEL: test_fadd_imm_1(
+; SM90: {
+; SM90-NEXT: .reg .b16 %rs<4>;
+; SM90-EMPTY:
+; SM90-NEXT: // %bb.0:
+; SM90-NEXT: ld.param.b16 %rs1, [test_fadd_imm_1_param_0];
+; SM90-NEXT: mov.b16 %rs2, 0x3F80;
+; SM90-NEXT: add.rn.bf16 %rs3, %rs1, %rs2;
+; SM90-NEXT: st.param.b16 [func_retval0], %rs3;
+; SM90-NEXT: ret;
%r = fadd bfloat %a, 1.0
ret bfloat %r
}
-; CHECK-LABEL: test_fsubx2(
-; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fsubx2_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fsubx2_param_1];
-; SM90: sub.rn.bf16x2 [[R:%r[0-9]+]], [[A]], [[B]];
-
-; SM80-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]];
-; SM80-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]];
-; SM80-DAG: cvt.f32.bf16 [[FA1:%f[0-9]+]], [[A1]];
-; SM80-DAG: cvt.f32.bf16 [[FA0:%f[0-9]+]], [[A0]];
-; SM80-DAG: cvt.f32.bf16 [[FB0:%f[0-9]+]], [[B0]];
-; SM80-DAG: cvt.f32.bf16 [[FB1:%f[0-9]+]], [[B1]];
-; SM80-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
-; SM80-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; SM80-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[FR0]];
-; SM80-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[FR1]];
-; SM80: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]};
-
-; CHECK: st.param.b32 [func_retval0], [[R]];
-; CHECK: ret;
-
define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
+; SM80-LABEL: test_fsubx2(
+; SM80: {
+; SM80-NEXT: .reg .b16 %rs<7>;
+; SM80-NEXT: .reg .b32 %r<4>;
+; SM80-NEXT: .reg .f32 %f<7>;
+; SM80-EMPTY:
+; SM80-NEXT: // %bb.0:
+; SM80-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
+; SM80-NEXT: ld.param.b32 %r2, [test_fsubx2_param_1];
+; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
+; SM80-NEXT: cvt.f32.bf16 %f1, %rs2;
+; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
+; SM80-NEXT: cvt.f32.bf16 %f2, %rs4;
+; SM80-NEXT: sub.rn.f32 %f3, %f2, %f1;
+; SM80-NEXT: cvt.rn.bf16.f32 %rs5, %f3;
+; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
+; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
+; SM80-NEXT: sub.rn.f32 %f6, %f5, %f4;
+; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6;
+; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5};
+; SM80-NEXT: st.param.b32 [func_retval0], %r3;
+; SM80-NEXT: ret;
+;
+; SM90-LABEL: test_fsubx2(
+; SM90: {
+; SM90-NEXT: .reg .b32 %r<4>;
+; SM90-EMPTY:
+; SM90-NEXT: // %bb.0:
+; SM90-NEXT: ld.param.b32 %r1, [test_fsubx2_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [test_fsubx2_param_0];
+; SM90-NEXT: sub.rn.bf16x2 %r3, %r2, %r1;
+; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: ret;
%r = fsub <2 x bfloat> %a, %b
ret <2 x bfloat> %r
}
-; CHECK-LABEL: test_fmulx2(
-; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fmulx2_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fmulx2_param_1];
-; SM90: mul.rn.bf16x2 [[R:%r[0-9]+]], [[A]], [[B]];
-
-; SM80-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]];
-; SM80-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]];
-; SM80-DAG: cvt.f32.bf16 [[FA1:%f[0-9]+]], [[A1]];
-; SM80-DAG: cvt.f32.bf16 [[FA0:%f[0-9]+]], [[A0]];
-; SM80-DAG: cvt.f32.bf16 [[FB0:%f[0-9]+]], [[B0]];
-; SM80-DAG: cvt.f32.bf16 [[FB1:%f[0-9]+]], [[B1]];
-; SM80-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
-; SM80-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; SM80-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[FR0]];
-; SM80-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[FR1]];
-; SM80: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]};
-
-; CHECK: st.param.b32 [func_retval0], [[R]];
-; CHECK: ret;
-
define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
+; SM80-LABEL: test_fmulx2(
+; SM80: {
+; SM80-NEXT: .reg .b16 %rs<7>;
+; SM80-NEXT: .reg .b32 %r<4>;
+; SM80-NEXT: .reg .f32 %f<7>;
+; SM80-EMPTY:
+; SM80-NEXT: // %bb.0:
+; SM80-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
+; SM80-NEXT: ld.param.b32 %r2, [test_fmulx2_param_1];
+; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
+; SM80-NEXT: cvt.f32.bf16 %f1, %rs2;
+; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
+; SM80-NEXT: cvt.f32.bf16 %f2, %rs4;
+; SM80-NEXT: mul.rn.f32 %f3, %f2, %f1;
+; SM80-NEXT: cvt.rn.bf16.f32 %rs5, %f3;
+; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
+; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
+; SM80-NEXT: mul.rn.f32 %f6, %f5, %f4;
+; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6;
+; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5};
+; SM80-NEXT: st.param.b32 [func_retval0], %r3;
+; SM80-NEXT: ret;
+;
+; SM90-LABEL: test_fmulx2(
+; SM90: {
+; SM90-NEXT: .reg .b32 %r<4>;
+; SM90-EMPTY:
+; SM90-NEXT: // %bb.0:
+; SM90-NEXT: ld.param.b32 %r1, [test_fmulx2_param_1];
+; SM90-NEXT: ld.param.b32 %r2, [test_fmulx2_param_0];
+; SM90-NEXT: mul.rn.bf16x2 %r3, %r2, %r1;
+; SM90-NEXT: st.param.b32 [func_retval0], %r3;
+; SM90-NEXT: ret;
%r = fmul <2 x bfloat> %a, %b
ret <2 x bfloat> %r
}
-; CHECK-LABEL: test_fdiv(
-; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fdiv_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fdiv_param_1];
-; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]
-; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]]
-; CHECK-DAG: cvt.f32.bf16 [[FA0:%f[0-9]+]], [[A0]];
-; CHECK-DAG: cvt.f32.bf16 [[FA1:%f[0-9]+]], [[A1]];
-; CHECK-DAG: cvt.f32.bf16 [[FB0:%f[0-9]+]], [[B0]];
-; CHECK-DAG: cvt.f32.bf16 [[FB1:%f[0-9]+]], [[B1]];
-; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]];
-; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]];
-; CHECK-DAG: cvt.rn.bf16.f32 [[R0:%rs[0-9]+]], [[FR0]];
-; CHECK-DAG: cvt.rn.bf16.f32 [[R1:%rs[0-9]+]], [[FR1]];
-; CHECK-NEXT: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]}
-; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
-; CHECK-NEXT: ret;
-
define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
+; CHECK-LABEL: test_fdiv(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<7>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-NEXT: .reg .f32 %f<7>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
+; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2;
+; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2;
+; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1;
+; CHECK-NEXT: cvt.f32.bf16 %f2, %rs4;
+; CHECK-NEXT: div.rn.f32 %f3, %f2, %f1;
+; CHECK-NEXT: cvt.rn.bf16.f32 %rs5, %f3;
+; CHECK-NEXT: cvt.f32.bf16 %f4, %rs1;
+; CHECK-NEXT: cvt.f32.bf16 %f5, %rs3;
+; CHECK-NEXT: div.rn.f32 %f6, %f5, %f4;
+; CHECK-NEXT: cvt.rn.bf16.f32 %rs6, %f6;
+; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5};
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%r = fdiv <2 x bfloat> %a, %b
ret <2 x bfloat> %r
}
-; CHECK-LABEL: test_fneg(
-; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_fneg_param_0];
-
-; CHECK-DAG: xor.b32 [[IHH0:%r[0-9]+]], [[A]], -2147450880;
-; CHECK-NEXT: st.param.b32 [func_retval0], [[IHH0]];
-; CHECK-NEXT: ret;
define <2 x bfloat> @test_fneg(<2 x bfloat> %a) #0 {
+; CHECK-LABEL: test_fneg(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u32 %r1, [test_fneg_param_0];
+; CHECK-NEXT: xor.b32 %r2, %r1, -2147450880;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
%r = fneg <2 x bfloat> %a
ret <2 x bfloat> %r
}
-; CHECK-LABEL: .func test_ldst_v2bf16(
-; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2bf16_param_0];
-; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2bf16_param_1];
-; CHECK-DAG: ld.b32 [[E:%r[0-9]+]], [%[[A]]]
-; CHECK-DAG: st.b32 [%[[B]]], [[E]];
-; CHECK: ret;
define void @test_ldst_v2bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: test_ldst_v2bf16(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v2bf16_param_0];
+; CHECK-NEXT: ld.b32 %r1, [%rd1];
+; CHECK-NEXT: ld.param.u64 %rd2, [test_ldst_v2bf16_param_1];
+; CHECK-NEXT: st.b32 [%rd2], %r1;
+; CHECK-NEXT: ret;
%t1 = load <2 x bfloat>, ptr %a
store <2 x bfloat> %t1, ptr %b, align 16
ret void
}
-; CHECK-LABEL: .func test_ldst_v3bf16(
-; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v3bf16_param_0];
-; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v3bf16_param_1];
-; -- v3 is inconvenient to capture as it's lowered as ld.b64 + fair
-; number of bitshifting instructions that may change at llvm's whim.
-; So we only verify that we only issue correct number of writes using
-; correct offset, but not the values we write.
-; CHECK-DAG: ld.u64
-; CHECK-DAG: st.u32 [%[[B]]],
-; CHECK-DAG: st.b16 [%[[B]]+4],
-; CHECK: ret;
define void @test_ldst_v3bf16(ptr %a, ptr %b) {
+; CHECK-LABEL: test_ldst_v3bf16(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test_ldst_v3bf16_param_0];
+; CHECK-NEXT: ld.u64 %rd2, [%rd1];
+; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r1}, %rd2; }
+; CHECK-NEXT: ld.param.u64 %rd3, [test_ldst_v3bf16_param_1];
+; CHECK-NEXT: st.u32 [%rd3], %rd2;
+; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {%rs1, tmp}, %r1; }
+; CHECK-NEXT: st.b16 [%rd3+4], %rs1;
+; CHECK-NEXT: ret;
%t1 = load <3 x bfloat>, ptr %a
store <3 x bfloat> %t1, ptr %b, align 16
ret void
@@ -169,161 +238,241 @@ define void @test_ldst_v3bf16(ptr %a, ptr %b) {
declare <2 x bfloat> @test_callee(<2 x bfloat> %a, <2 x bfloat> %b) #0
-; CHECK-LABEL: test_call(
-; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_call_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_call_param_1];
-; CHECK: {
-; CHECK-DAG: .param .align 4 .b8 param0[4];
-; CHECK-DAG: .param .align 4 .b8 param1[4];
-; CHECK-DAG: st.param.b32 [param0], [[A]];
-; CHECK-DAG: st.param.b32 [param1], [[B]];
-; CHECK-DAG: .param .align 4 .b8 retval0[4];
-; CHECK: call.uni (retval0),
-; CHECK-NEXT: test_callee,
-; CHECK: );
-; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0];
-; CHECK-NEXT: }
-; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
-; CHECK-NEXT: ret;
-
define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
+; CHECK-LABEL: test_call(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1];
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .align 4 .b8 param0[4];
+; CHECK-NEXT: st.param.b32 [param0], %r1;
+; CHECK-NEXT: .param .align 4 .b8 param1[4];
+; CHECK-NEXT: st.param.b32 [param1], %r2;
+; CHECK-NEXT: .param .align 4 .b8 retval0[4];
+; CHECK-NEXT: call.uni (retval0),
+; CHECK-NEXT: test_callee,
+; CHECK-NEXT: (
+; CHECK-NEXT: param0,
+; CHECK-NEXT: param1
+; CHECK-NEXT: );
+; CHECK-NEXT: ld.param.b32 %r3, [retval0];
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%r = call <2 x bfloat> @test_callee(<2 x bfloat> %a, <2 x bfloat> %b)
ret <2 x bfloat> %r
}
-; CHECK-LABEL: test_select(
-; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_param_0];
-; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_param_1];
-; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2]
-; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
-; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], [[A]], [[B]], [[PRED]];
-; CHECK-NEXT: st.param.b32 [func_retval0], [[R]];
-; CHECK-NEXT: ret;
-
define <2 x bfloat> @test_select(<2 x bfloat> %a, <2 x bfloat> %b, i1 zeroext %c) #0 {
+; CHECK-LABEL: test_select(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/116605
More information about the llvm-commits
mailing list