[llvm] 42b9c2a - [AMDGPU] add v2i32 and v2f32 insert_vector_elt tests. NFC.
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 5 14:32:41 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-08-05T14:28:32-07:00
New Revision: 42b9c2a17a0b63cccf3ac197a82f91b28e53e643
URL: https://github.com/llvm/llvm-project/commit/42b9c2a17a0b63cccf3ac197a82f91b28e53e643
DIFF: https://github.com/llvm/llvm-project/commit/42b9c2a17a0b63cccf3ac197a82f91b28e53e643.diff
LOG: [AMDGPU] add v2i32 and v2f32 insert_vector_elt tests. NFC.
Added:
Modified:
llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 30a1b31f11021..9c8733bacfe95 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -6,6 +6,129 @@
; FIXME: For some reason the 8 and 16 vectors are being stored as
; individual elements instead of 128-bit stores.
+define amdgpu_kernel void @insertelement_v2f32_0(<2 x float> addrspace(1)* %out, <2 x float> %a) nounwind {
+; SI-LABEL: insertelement_v2f32_0:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s4, 0x40a00000
+; SI-NEXT: s_mov_b32 s3, 0x100f000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: v_mov_b32_e32 v1, s5
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: insertelement_v2f32_0:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_mov_b32 s4, 0x40a00000
+; VI-NEXT: s_mov_b32 s3, 0x1100f000
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: v_mov_b32_e32 v1, s5
+; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT: s_endpgm
+ %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 0
+ store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+define amdgpu_kernel void @insertelement_v2f32_1(<2 x float> addrspace(1)* %out, <2 x float> %a) nounwind {
+; SI-LABEL: insertelement_v2f32_1:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s5, 0x40a00000
+; SI-NEXT: s_mov_b32 s3, 0x100f000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: v_mov_b32_e32 v1, s5
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: insertelement_v2f32_1:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_mov_b32 s5, 0x40a00000
+; VI-NEXT: s_mov_b32 s3, 0x1100f000
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: v_mov_b32_e32 v1, s5
+; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT: s_endpgm
+ %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 1
+ store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+define amdgpu_kernel void @insertelement_v2i32_0(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
+; SI-LABEL: insertelement_v2i32_0:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_movk_i32 s4, 0x3e7
+; SI-NEXT: s_mov_b32 s3, 0x100f000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: v_mov_b32_e32 v1, s5
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: insertelement_v2i32_0:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_movk_i32 s4, 0x3e7
+; VI-NEXT: s_mov_b32 s3, 0x1100f000
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: v_mov_b32_e32 v1, s5
+; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT: s_endpgm
+ %vecins = insertelement <2 x i32> %a, i32 999, i32 0
+ store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 16
+ ret void
+}
+
+define amdgpu_kernel void @insertelement_v2i32_1(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
+; SI-LABEL: insertelement_v2i32_1:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_movk_i32 s5, 0x3e7
+; SI-NEXT: s_mov_b32 s3, 0x100f000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: v_mov_b32_e32 v1, s5
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: insertelement_v2i32_1:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x8
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_movk_i32 s5, 0x3e7
+; VI-NEXT: s_mov_b32 s3, 0x1100f000
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: v_mov_b32_e32 v1, s5
+; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT: s_endpgm
+ %vecins = insertelement <2 x i32> %a, i32 999, i32 1
+ store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 16
+ ret void
+}
; FIXME: Why is the constant moved into the intermediate register and
; not just directly into the vector component?
@@ -1336,19 +1459,19 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_cmp_lg_u32 s0, 0
-; SI-NEXT: s_cbranch_scc0 BB26_2
+; SI-NEXT: s_cbranch_scc0 BB30_2
; SI-NEXT: ; %bb.1: ; %else
; SI-NEXT: s_load_dword s1, s[6:7], 0x1
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: s_andn2_b64 vcc, exec, s[2:3]
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
-; SI-NEXT: s_cbranch_vccz BB26_3
-; SI-NEXT: s_branch BB26_4
-; SI-NEXT: BB26_2:
-; SI-NEXT: BB26_3: ; %if
+; SI-NEXT: s_cbranch_vccz BB30_3
+; SI-NEXT: s_branch BB30_4
+; SI-NEXT: BB30_2:
+; SI-NEXT: BB30_3: ; %if
; SI-NEXT: s_load_dword s1, s[6:7], 0x0
-; SI-NEXT: BB26_4: ; %endif
+; SI-NEXT: BB30_4: ; %endif
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s0
; SI-NEXT: s_mov_b32 s7, 0x100f000
@@ -1363,16 +1486,16 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
; VI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_cmp_lg_u32 s0, 0
-; VI-NEXT: s_cbranch_scc0 BB26_2
+; VI-NEXT: s_cbranch_scc0 BB30_2
; VI-NEXT: ; %bb.1: ; %else
; VI-NEXT: s_load_dword s1, s[6:7], 0x4
-; VI-NEXT: s_cbranch_execz BB26_3
-; VI-NEXT: s_branch BB26_4
-; VI-NEXT: BB26_2:
-; VI-NEXT: BB26_3: ; %if
+; VI-NEXT: s_cbranch_execz BB30_3
+; VI-NEXT: s_branch BB30_4
+; VI-NEXT: BB30_2:
+; VI-NEXT: BB30_3: ; %if
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s1, s[6:7], 0x0
-; VI-NEXT: BB26_4: ; %endif
+; VI-NEXT: BB30_4: ; %endif
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: s_mov_b32 s7, 0x1100f000
More information about the llvm-commits
mailing list