[llvm] 940ea5b - AMDGPU: Add some exotic truncating store tests

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 2 07:33:43 PDT 2024


Author: Matt Arsenault
Date: 2024-07-02T16:33:36+02:00
New Revision: 940ea5b8c55dbcb2b1c62027c6c18c70bdf101a9

URL: https://github.com/llvm/llvm-project/commit/940ea5b8c55dbcb2b1c62027c6c18c70bdf101a9
DIFF: https://github.com/llvm/llvm-project/commit/940ea5b8c55dbcb2b1c62027c6c18c70bdf101a9.diff

LOG: AMDGPU: Add some exotic truncating store tests

PR#97010 is touching the legalize rules for 5 vector stores,
but not all of them so check some more cases to make sure they work.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/trunc-store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/trunc-store.ll b/llvm/test/CodeGen/AMDGPU/trunc-store.ll
index e642a7099448f..931953e230bb2 100644
--- a/llvm/test/CodeGen/AMDGPU/trunc-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc-store.ll
@@ -1,18 +1,422 @@
-; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=verde < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
 
-; FUNC-LABEL: {{^}}truncstore_arg_v16i32_to_v16i8:
-; SI: buffer_store_dwordx4
 define amdgpu_kernel void @truncstore_arg_v16i32_to_v16i8(ptr addrspace(1) %out, <16 x i32> %in) {
+; SI-LABEL: truncstore_arg_v16i32_to_v16i8:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x19
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_and_b32 s18, s18, 0xff
+; SI-NEXT:    s_lshl_b32 s17, s17, 8
+; SI-NEXT:    s_and_b32 s16, s16, 0xff
+; SI-NEXT:    s_and_b32 s14, s14, 0xff
+; SI-NEXT:    s_lshl_b32 s13, s13, 8
+; SI-NEXT:    s_and_b32 s12, s12, 0xff
+; SI-NEXT:    s_and_b32 s10, s10, 0xff
+; SI-NEXT:    s_lshl_b32 s9, s9, 8
+; SI-NEXT:    s_and_b32 s8, s8, 0xff
+; SI-NEXT:    s_and_b32 s6, s6, 0xff
+; SI-NEXT:    s_lshl_b32 s5, s5, 8
+; SI-NEXT:    s_and_b32 s4, s4, 0xff
+; SI-NEXT:    s_lshl_b32 s19, s19, 24
+; SI-NEXT:    s_lshl_b32 s18, s18, 16
+; SI-NEXT:    s_or_b32 s16, s16, s17
+; SI-NEXT:    s_lshl_b32 s15, s15, 24
+; SI-NEXT:    s_lshl_b32 s14, s14, 16
+; SI-NEXT:    s_or_b32 s12, s12, s13
+; SI-NEXT:    s_lshl_b32 s11, s11, 24
+; SI-NEXT:    s_lshl_b32 s10, s10, 16
+; SI-NEXT:    s_or_b32 s8, s8, s9
+; SI-NEXT:    s_lshl_b32 s7, s7, 24
+; SI-NEXT:    s_lshl_b32 s6, s6, 16
+; SI-NEXT:    s_or_b32 s4, s4, s5
+; SI-NEXT:    s_or_b32 s18, s19, s18
+; SI-NEXT:    s_and_b32 s16, s16, 0xffff
+; SI-NEXT:    s_or_b32 s14, s15, s14
+; SI-NEXT:    s_and_b32 s12, s12, 0xffff
+; SI-NEXT:    s_or_b32 s10, s11, s10
+; SI-NEXT:    s_and_b32 s8, s8, 0xffff
+; SI-NEXT:    s_or_b32 s6, s7, s6
+; SI-NEXT:    s_and_b32 s4, s4, 0xffff
+; SI-NEXT:    s_or_b32 s16, s16, s18
+; SI-NEXT:    s_or_b32 s12, s12, s14
+; SI-NEXT:    s_or_b32 s8, s8, s10
+; SI-NEXT:    s_or_b32 s4, s4, s6
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    v_mov_b32_e32 v1, s8
+; SI-NEXT:    v_mov_b32_e32 v2, s12
+; SI-NEXT:    v_mov_b32_e32 v3, s16
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: truncstore_arg_v16i32_to_v16i8:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x64
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_lshlrev_b16_e64 v0, 8, s17
+; VI-NEXT:    v_mov_b32_e32 v1, s16
+; VI-NEXT:    v_lshlrev_b16_e64 v2, 8, s19
+; VI-NEXT:    v_mov_b32_e32 v3, s18
+; VI-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v1, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v0, 8, s13
+; VI-NEXT:    v_mov_b32_e32 v1, s12
+; VI-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v1, 8, s15
+; VI-NEXT:    v_mov_b32_e32 v2, s14
+; VI-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v0, 8, s9
+; VI-NEXT:    v_mov_b32_e32 v1, s8
+; VI-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v1, 8, s11
+; VI-NEXT:    v_mov_b32_e32 v4, s10
+; VI-NEXT:    v_or_b32_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v0, 8, s5
+; VI-NEXT:    v_mov_b32_e32 v4, s4
+; VI-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v4, 8, s7
+; VI-NEXT:    v_mov_b32_e32 v5, s6
+; VI-NEXT:    v_or_b32_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_mov_b32_e32 v4, s0
+; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; VI-NEXT:    s_endpgm
   %trunc = trunc <16 x i32> %in to <16 x i8>
   store <16 x i8> %trunc, ptr addrspace(1) %out
   ret void
 }
 
-; FUNC-LABEL: {{^}}truncstore_arg_v16i64_to_v16i8:
-; SI: buffer_store_dwordx4
 define amdgpu_kernel void @truncstore_arg_v16i64_to_v16i8(ptr addrspace(1) %out, <16 x i64> %in) {
+; SI-LABEL: truncstore_arg_v16i64_to_v16i8:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dwordx16 s[16:31], s[0:1], 0x39
+; SI-NEXT:    s_load_dwordx2 s[36:37], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx16 s[0:15], s[0:1], 0x29
+; SI-NEXT:    s_mov_b32 s39, 0xf000
+; SI-NEXT:    s_mov_b32 s38, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_and_b32 s3, s28, 0xff
+; SI-NEXT:    s_lshl_b32 s1, s30, 24
+; SI-NEXT:    s_lshl_b32 s3, s3, 16
+; SI-NEXT:    s_or_b32 s1, s1, s3
+; SI-NEXT:    s_lshl_b32 s3, s26, 8
+; SI-NEXT:    s_and_b32 s5, s24, 0xff
+; SI-NEXT:    s_or_b32 s3, s5, s3
+; SI-NEXT:    s_and_b32 s3, s3, 0xffff
+; SI-NEXT:    s_and_b32 s5, s20, 0xff
+; SI-NEXT:    s_or_b32 s1, s3, s1
+; SI-NEXT:    s_lshl_b32 s3, s22, 24
+; SI-NEXT:    s_lshl_b32 s5, s5, 16
+; SI-NEXT:    s_or_b32 s3, s3, s5
+; SI-NEXT:    s_lshl_b32 s5, s18, 8
+; SI-NEXT:    s_and_b32 s7, s16, 0xff
+; SI-NEXT:    s_or_b32 s5, s7, s5
+; SI-NEXT:    s_and_b32 s5, s5, 0xffff
+; SI-NEXT:    s_and_b32 s7, s12, 0xff
+; SI-NEXT:    s_or_b32 s3, s5, s3
+; SI-NEXT:    s_lshl_b32 s5, s14, 24
+; SI-NEXT:    s_lshl_b32 s7, s7, 16
+; SI-NEXT:    s_or_b32 s5, s5, s7
+; SI-NEXT:    s_lshl_b32 s7, s10, 8
+; SI-NEXT:    s_and_b32 s8, s8, 0xff
+; SI-NEXT:    s_and_b32 s4, s4, 0xff
+; SI-NEXT:    s_lshl_b32 s2, s2, 8
+; SI-NEXT:    s_and_b32 s0, s0, 0xff
+; SI-NEXT:    s_or_b32 s7, s8, s7
+; SI-NEXT:    s_lshl_b32 s6, s6, 24
+; SI-NEXT:    s_lshl_b32 s4, s4, 16
+; SI-NEXT:    s_or_b32 s0, s0, s2
+; SI-NEXT:    s_and_b32 s7, s7, 0xffff
+; SI-NEXT:    s_or_b32 s4, s6, s4
+; SI-NEXT:    s_and_b32 s0, s0, 0xffff
+; SI-NEXT:    s_or_b32 s5, s7, s5
+; SI-NEXT:    s_or_b32 s0, s0, s4
+; SI-NEXT:    v_mov_b32_e32 v0, s0
+; SI-NEXT:    v_mov_b32_e32 v1, s5
+; SI-NEXT:    v_mov_b32_e32 v2, s3
+; SI-NEXT:    v_mov_b32_e32 v3, s1
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[36:39], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: truncstore_arg_v16i64_to_v16i8:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx16 s[16:31], s[0:1], 0xe4
+; VI-NEXT:    s_load_dwordx2 s[34:35], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx16 s[0:15], s[0:1], 0xa4
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_lshlrev_b16_e64 v0, 8, s26
+; VI-NEXT:    v_mov_b32_e32 v1, s24
+; VI-NEXT:    v_lshlrev_b16_e64 v2, 8, s30
+; VI-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_mov_b32_e32 v1, s28
+; VI-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v0, 8, s18
+; VI-NEXT:    v_mov_b32_e32 v1, s16
+; VI-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v1, 8, s22
+; VI-NEXT:    v_mov_b32_e32 v2, s20
+; VI-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v0, 8, s10
+; VI-NEXT:    v_mov_b32_e32 v1, s8
+; VI-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v1, 8, s14
+; VI-NEXT:    v_mov_b32_e32 v4, s12
+; VI-NEXT:    v_or_b32_sdwa v1, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v0, 8, s2
+; VI-NEXT:    v_mov_b32_e32 v4, s0
+; VI-NEXT:    v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e64 v4, 8, s6
+; VI-NEXT:    v_mov_b32_e32 v5, s4
+; VI-NEXT:    v_or_b32_sdwa v4, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_mov_b32_e32 v4, s34
+; VI-NEXT:    v_mov_b32_e32 v5, s35
+; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; VI-NEXT:    s_endpgm
   %trunc = trunc <16 x i64> %in to <16 x i8>
   store <16 x i8> %trunc, ptr addrspace(1) %out
   ret void
 }
+
+define void @truncstore_v5i32_to_v5i1(ptr addrspace(1) %out, <5 x i32> %val) {
+; SI-LABEL: truncstore_v5i32_to_v5i1:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_and_b32_e32 v3, 1, v3
+; SI-NEXT:    v_and_b32_e32 v2, 1, v2
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_and_b32_e32 v3, 1, v4
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 2, v3
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_and_b32_e32 v3, 1, v5
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 4, v6
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_and_b32_e32 v2, 31, v2
+; SI-NEXT:    buffer_store_byte v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: truncstore_v5i32_to_v5i1:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_and_b32_e32 v3, 1, v3
+; VI-NEXT:    v_and_b32_e32 v2, 1, v2
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_and_b32_e32 v3, 1, v4
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 2, v3
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_and_b32_e32 v3, 1, v5
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 3, v3
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 4, v6
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_and_b32_e32 v2, 31, v2
+; VI-NEXT:    flat_store_byte v[0:1], v2
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %trunc = trunc <5 x i32> %val to <5 x i1>
+  store <5 x i1> %trunc, ptr addrspace(1) %out
+  ret void
+}
+
+define void @truncstore_v5i32_to_v5i8(ptr addrspace(1) %out, <5 x i32> %val) {
+; SI-LABEL: truncstore_v5i32_to_v5i8:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_and_b32_e32 v4, 0xff, v4
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
+; SI-NEXT:    v_and_b32_e32 v2, 0xff, v2
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
+; SI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_or_b32_e32 v4, v5, v4
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    v_or_b32_e32 v2, v2, v4
+; SI-NEXT:    buffer_store_byte v6, v[0:1], s[4:7], 0 addr64 offset:4
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: truncstore_v5i32_to_v5i8:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 8, v3
+; VI-NEXT:    v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 8, v5
+; VI-NEXT:    v_or_b32_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v4, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 4, v0
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
+; VI-NEXT:    flat_store_byte v[2:3], v6
+; VI-NEXT:    flat_store_dword v[0:1], v4
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %trunc = trunc <5 x i32> %val to <5 x i8>
+  store <5 x i8> %trunc, ptr addrspace(1) %out
+  ret void
+}
+
+define void @truncstore_v6i32_to_v6i1(ptr addrspace(1) %out, <6 x i32> %val) {
+; SI-LABEL: truncstore_v6i32_to_v6i1:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_and_b32_e32 v3, 1, v3
+; SI-NEXT:    v_and_b32_e32 v2, 1, v2
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 1, v3
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_and_b32_e32 v3, 1, v4
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 2, v3
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_and_b32_e32 v3, 1, v5
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 3, v3
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_and_b32_e32 v3, 1, v6
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 4, v3
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 5, v7
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_and_b32_e32 v2, 63, v2
+; SI-NEXT:    buffer_store_byte v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: truncstore_v6i32_to_v6i1:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_and_b32_e32 v3, 1, v3
+; VI-NEXT:    v_and_b32_e32 v2, 1, v2
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 1, v3
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_and_b32_e32 v3, 1, v4
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 2, v3
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_and_b32_e32 v3, 1, v5
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 3, v3
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_and_b32_e32 v3, 1, v6
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 4, v3
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 5, v7
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    v_and_b32_e32 v2, 63, v2
+; VI-NEXT:    flat_store_byte v[0:1], v2
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %trunc = trunc <6 x i32> %val to <6 x i1>
+  store <6 x i1> %trunc, ptr addrspace(1) %out
+  ret void
+}
+
+define void @truncstore_v6i32_to_v6i8(ptr addrspace(1) %out, <6 x i32> %val) {
+; SI-LABEL: truncstore_v6i32_to_v6i8:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_and_b32_e32 v4, 0xff, v4
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
+; SI-NEXT:    v_and_b32_e32 v2, 0xff, v2
+; SI-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
+; SI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_or_b32_e32 v4, v5, v4
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_or_b32_e32 v2, v2, v4
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 8, v7
+; SI-NEXT:    v_and_b32_e32 v4, 0xff, v6
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_or_b32_e32 v3, v4, v3
+; SI-NEXT:    buffer_store_short v3, v[0:1], s[4:7], 0 addr64 offset:4
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: truncstore_v6i32_to_v6i8:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 8, v3
+; VI-NEXT:    v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 8, v5
+; VI-NEXT:    v_or_b32_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v4, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b16_e32 v2, 8, v7
+; VI-NEXT:    v_or_b32_sdwa v5, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 4, v0
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
+; VI-NEXT:    flat_store_short v[2:3], v5
+; VI-NEXT:    flat_store_dword v[0:1], v4
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %trunc = trunc <6 x i32> %val to <6 x i8>
+  store <6 x i8> %trunc, ptr addrspace(1) %out
+  ret void
+}
+
+define void @truncstore_v6i32_to_v6i16(ptr addrspace(1) %out, <6 x i32> %val) {
+; SI-LABEL: truncstore_v6i32_to_v6i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
+; SI-NEXT:    v_and_b32_e32 v4, 0xffff, v4
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_or_b32_e32 v4, v4, v5
+; SI-NEXT:    v_or_b32_e32 v3, v2, v3
+; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
+; SI-NEXT:    v_and_b32_e32 v5, 0xffff, v6
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_or_b32_e32 v2, v5, v2
+; SI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 offset:8
+; SI-NEXT:    buffer_store_dwordx2 v[3:4], v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: truncstore_v6i32_to_v6i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    s_mov_b32 s4, 0x1000504
+; VI-NEXT:    v_perm_b32 v6, v6, v7, s4
+; VI-NEXT:    v_perm_b32 v5, v4, v5, s4
+; VI-NEXT:    v_perm_b32 v4, v2, v3, s4
+; VI-NEXT:    flat_store_dwordx3 v[0:1], v[4:6]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    s_setpc_b64 s[30:31]
+  %trunc = trunc <6 x i32> %val to <6 x i16>
+  store <6 x i16> %trunc, ptr addrspace(1) %out
+  ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GCN: {{.*}}


        


More information about the llvm-commits mailing list