[llvm] [NFC][AMDGPU] Auto-generate check lines for some test cases (PR #112426)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 15 13:22:27 PDT 2024


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/112426

>From 700bdb69b2ff6d110122f268521c137d023461b2 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 15 Oct 2024 15:58:20 -0400
Subject: [PATCH] [NFC][AMDGPU] Auto-generate check lines for
 `llvm/test/CodeGen/AMDGPU/andorbitset.ll`

---
 llvm/test/CodeGen/AMDGPU/andorbitset.ll    | 103 +++++++++++---
 llvm/test/CodeGen/AMDGPU/andorxorinvimm.ll |  79 +++++++++--
 llvm/test/CodeGen/AMDGPU/fabs.f64.ll       | 155 ++++++++++++++++-----
 3 files changed, 277 insertions(+), 60 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/andorbitset.ll b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
index a189ba9b103421..a04c46b0a805c4 100644
--- a/llvm/test/CodeGen/AMDGPU/andorbitset.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
@@ -1,48 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: {{^}}s_clear_msb:
-; SI: s_bitset0_b32 s{{[0-9]+}}, 31
 define amdgpu_kernel void @s_clear_msb(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_clear_msb:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitset0_b32 s4, 31
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = and i32 %in, 2147483647
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_set_msb:
-; SI: s_bitset1_b32 s{{[0-9]+}}, 31
 define amdgpu_kernel void @s_set_msb(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_set_msb:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitset1_b32 s4, 31
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = or i32 %in, 2147483648
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_clear_lsb:
-; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, -2
 define amdgpu_kernel void @s_clear_lsb(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_clear_lsb:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_and_b32 s4, s4, -2
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = and i32 %in, 4294967294
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_set_lsb:
-; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1
 define amdgpu_kernel void @s_set_lsb(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_set_lsb:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_or_b32 s4, s4, 1
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = or i32 %in, 1
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_clear_midbit:
-; SI: s_bitset0_b32 s{{[0-9]+}}, 8
 define amdgpu_kernel void @s_clear_midbit(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_clear_midbit:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitset0_b32 s4, 8
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = and i32 %in, 4294967039
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_set_midbit:
-; SI: s_bitset1_b32 s{{[0-9]+}}, 8
 define amdgpu_kernel void @s_set_midbit(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_set_midbit:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitset1_b32 s4, 8
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = or i32 %in, 256
   store i32 %x, ptr addrspace(1) %out
   ret void
@@ -50,11 +105,27 @@ define amdgpu_kernel void @s_set_midbit(ptr addrspace(1) %out, i32 %in) {
 
 @gv = external addrspace(1) global i32
 
-; Make sure there's no verifier error with an undef source.
-; SI-LABEL: {{^}}bitset_verifier_error:
-; SI-NOT:   %bb.1:
-; SI:       s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7fffffff
 define void @bitset_verifier_error() local_unnamed_addr #0 {
+; SI-LABEL: bitset_verifier_error:
+; SI:       ; %bb.0: ; %bb
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_getpc_b64 s[4:5]
+; SI-NEXT:    s_add_u32 s4, s4, gv at gotpcrel32@lo+4
+; SI-NEXT:    s_addc_u32 s5, s5, gv at gotpcrel32@hi+12
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_and_b32 s8, s4, 0x7fffffff
+; SI-NEXT:    v_mov_b32_e32 v0, s8
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_waitcnt expcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, 0x3f7fbe77
+; SI-NEXT:    v_cmp_ge_f32_e64 s[4:5], |s4|, v0
+; SI-NEXT:    s_and_b64 vcc, exec, s[4:5]
+; SI-NEXT:    s_cbranch_vccnz .LBB6_2
+; SI-NEXT:  ; %bb.1: ; %bb5
+; SI-NEXT:  .LBB6_2: ; %bb6
 bb:
   %i = call float @llvm.fabs.f32(float undef) #0
   %i1 = bitcast float %i to i32
diff --git a/llvm/test/CodeGen/AMDGPU/andorxorinvimm.ll b/llvm/test/CodeGen/AMDGPU/andorxorinvimm.ll
index dc158028bd7b05..4b56b5e9d24f5c 100644
--- a/llvm/test/CodeGen/AMDGPU/andorxorinvimm.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorxorinvimm.ll
@@ -1,48 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: {{^}}s_or_to_orn2:
-; SI: s_orn2_b32 s{{[0-9]+}}, s{{[0-9]+}}, 50
 define amdgpu_kernel void @s_or_to_orn2(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_or_to_orn2:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_orn2_b32 s4, s4, 50
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = or i32 %in, -51
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_or_to_orn2_imm0:
-; SI: s_orn2_b32 s{{[0-9]+}}, s{{[0-9]+}}, 50
 define amdgpu_kernel void @s_or_to_orn2_imm0(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_or_to_orn2_imm0:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_orn2_b32 s4, s4, 50
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = or i32 -51, %in
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_and_to_andn2:
-; SI: s_andn2_b32 s{{[0-9]+}}, s{{[0-9]+}}, 50
 define amdgpu_kernel void @s_and_to_andn2(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_and_to_andn2:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_andn2_b32 s4, s4, 50
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = and i32 %in, -51
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_and_to_andn2_imm0:
-; SI: s_andn2_b32 s{{[0-9]+}}, s{{[0-9]+}}, 50
 define amdgpu_kernel void @s_and_to_andn2_imm0(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_and_to_andn2_imm0:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_andn2_b32 s4, s4, 50
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = and i32 -51, %in
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_xor_to_xnor:
-; SI: s_xnor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 50
 define amdgpu_kernel void @s_xor_to_xnor(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_xor_to_xnor:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_xnor_b32 s4, s4, 50
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = xor i32 %in, -51
   store i32 %x, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}s_xor_to_xnor_imm0:
-; SI: s_xnor_b32 s{{[0-9]+}}, s{{[0-9]+}}, 50
 define amdgpu_kernel void @s_xor_to_xnor_imm0(ptr addrspace(1) %out, i32 %in) {
+; SI-LABEL: s_xor_to_xnor_imm0:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s4, s[2:3], 0xb
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_xnor_b32 s4, s4, 50
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %x = xor i32 -51, %in
   store i32 %x, ptr addrspace(1) %out
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f64.ll b/llvm/test/CodeGen/AMDGPU/fabs.f64.ll
index 32d5fa6e72d791..fb1d3e79ea29af 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.f64.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -7,10 +8,25 @@ declare double @llvm.fabs.f64(double) readnone
 declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
 declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
 
-; FUNC-LABEL: {{^}}v_fabs_f64:
-; SI: v_and_b32
-; SI: s_endpgm
 define amdgpu_kernel void @v_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; SI-LABEL: v_fabs_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s10, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
+; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    s_mov_b32 s11, s7
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; SI-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s5, s1
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tidext = sext i32 %tid to i64
   %gep = getelementptr double, ptr addrspace(1) %in, i64 %tidext
@@ -20,77 +36,152 @@ define amdgpu_kernel void @v_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %i
   ret void
 }
 
-; FUNC-LABEL: {{^}}fabs_f64:
-; SI: s_bitset0_b32
-; SI: s_endpgm
 define amdgpu_kernel void @fabs_f64(ptr addrspace(1) %out, double %in) {
+; SI-LABEL: fabs_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitset0_b32 s3, 31
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s5, s1
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT:    s_endpgm
   %fabs = call double @llvm.fabs.f64(double %in)
   store double %fabs, ptr addrspace(1) %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}fabs_v2f64:
-; SI: s_bitset0_b32
-; SI: s_bitset0_b32
-; SI: s_endpgm
 define amdgpu_kernel void @fabs_v2f64(ptr addrspace(1) %out, <2 x double> %in) {
+; SI-LABEL: fabs_v2f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0xd
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitset0_b32 s7, 31
+; SI-NEXT:    s_bitset0_b32 s5, 31
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    v_mov_b32_e32 v2, s6
+; SI-NEXT:    v_mov_b32_e32 v1, s5
+; SI-NEXT:    v_mov_b32_e32 v3, s7
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
   store <2 x double> %fabs, ptr addrspace(1) %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}fabs_v4f64:
-; SI: s_bitset0_b32
-; SI: s_bitset0_b32
-; SI: s_bitset0_b32
-; SI: s_bitset0_b32
-; SI: s_endpgm
 define amdgpu_kernel void @fabs_v4f64(ptr addrspace(1) %out, <4 x double> %in) {
+; SI-LABEL: fabs_v4f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx8 s[4:11], s[2:3], 0x11
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitset0_b32 s7, 31
+; SI-NEXT:    s_bitset0_b32 s11, 31
+; SI-NEXT:    s_bitset0_b32 s9, 31
+; SI-NEXT:    s_bitset0_b32 s5, 31
+; SI-NEXT:    v_mov_b32_e32 v0, s8
+; SI-NEXT:    v_mov_b32_e32 v2, s10
+; SI-NEXT:    v_mov_b32_e32 v4, s4
+; SI-NEXT:    v_mov_b32_e32 v6, s6
+; SI-NEXT:    v_mov_b32_e32 v1, s9
+; SI-NEXT:    v_mov_b32_e32 v3, s11
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16
+; SI-NEXT:    v_mov_b32_e32 v5, s5
+; SI-NEXT:    v_mov_b32_e32 v7, s7
+; SI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
   store <4 x double> %fabs, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}fabs_fold_f64:
-; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
-; SI-NOT: and
-; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
-; SI: s_endpgm
 define amdgpu_kernel void @fabs_fold_f64(ptr addrspace(1) %out, [8 x i32], double %in0, [8 x i32], double %in1) {
+; SI-LABEL: fabs_fold_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x1d
+; SI-NEXT:    s_load_dwordx2 s[6:7], s[2:3], 0x13
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    v_mov_b32_e32 v1, s5
+; SI-NEXT:    v_mul_f64 v[0:1], |s[6:7]|, v[0:1]
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %fabs = call double @llvm.fabs.f64(double %in0)
   %fmul = fmul double %fabs, %in1
   store double %fmul, ptr addrspace(1) %out
   ret void
 }
 
-; SI-LABEL: {{^}}fabs_fn_fold_f64:
-; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
-; SI-NOT: and
-; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
-; SI: s_endpgm
 define amdgpu_kernel void @fabs_fn_fold_f64(ptr addrspace(1) %out, [8 x i32], double %in0, [8 x i32], double %in1) {
+; SI-LABEL: fabs_fn_fold_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x1d
+; SI-NEXT:    s_load_dwordx2 s[6:7], s[2:3], 0x13
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    v_mov_b32_e32 v1, s5
+; SI-NEXT:    v_mul_f64 v[0:1], |s[6:7]|, v[0:1]
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT:    s_endpgm
   %fabs = call double @fabs(double %in0)
   %fmul = fmul double %fabs, %in1
   store double %fmul, ptr addrspace(1) %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}fabs_free_f64:
-; SI: s_bitset0_b32
-; SI: s_endpgm
 define amdgpu_kernel void @fabs_free_f64(ptr addrspace(1) %out, i64 %in) {
+; SI-LABEL: fabs_free_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitset0_b32 s3, 31
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s5, s1
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT:    s_endpgm
   %bc= bitcast i64 %in to double
   %fabs = call double @llvm.fabs.f64(double %bc)
   store double %fabs, ptr addrspace(1) %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}fabs_fn_free_f64:
-; SI: s_bitset0_b32
-; SI: s_endpgm
 define amdgpu_kernel void @fabs_fn_free_f64(ptr addrspace(1) %out, i64 %in) {
+; SI-LABEL: fabs_fn_free_f64:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x9
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_bitset0_b32 s3, 31
+; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_mov_b32 s4, s0
+; SI-NEXT:    s_mov_b32 s5, s1
+; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT:    s_endpgm
   %bc= bitcast i64 %in to double
   %fabs = call double @fabs(double %bc)
   store double %fabs, ptr addrspace(1) %out
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; FUNC: {{.*}}



More information about the llvm-commits mailing list