[llvm] r365754 - [AMDGPU] Regenerate idot tests. NFCI.

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 11 03:37:59 PDT 2019


Author: rksimon
Date: Thu Jul 11 03:37:58 2019
New Revision: 365754

URL: http://llvm.org/viewvc/llvm-project?rev=365754&view=rev
Log:
[AMDGPU] Regenerate idot tests. NFCI.

Reduces diff in D63281.

Modified:
    llvm/trunk/test/CodeGen/AMDGPU/idot2.ll
    llvm/trunk/test/CodeGen/AMDGPU/idot4s.ll
    llvm/trunk/test/CodeGen/AMDGPU/idot8s.ll

Modified: llvm/trunk/test/CodeGen/AMDGPU/idot2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/idot2.ll?rev=365754&r1=365753&r2=365754&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/idot2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/idot2.ll Thu Jul 11 03:37:58 2019
@@ -2786,9 +2786,9 @@ define amdgpu_kernel void @notsdot2_sext
 ; GFX10-DL-NEXT:    global_load_ushort v2, v[2:3], off
 ; GFX10-DL-NEXT:    global_load_ushort v7, v[0:1], off
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX10-DL-NEXT:    v_and_b32_sdwa v1, v2, v4
+; GFX10-DL-NEXT:    v_and_b32_sdwa v1, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT:    v_and_b32_sdwa v3, v7, v4
+; GFX10-DL-NEXT:    v_and_b32_sdwa v3, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX10-DL-NEXT:    v_bfe_i32 v2, v2, 0, 8
 ; GFX10-DL-NEXT:    v_bfe_i32 v0, v7, 0, 8
 ; GFX10-DL-NEXT:    v_bfe_i32 v1, v1, 0, 8

Modified: llvm/trunk/test/CodeGen/AMDGPU/idot4s.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/idot4s.ll?rev=365754&r1=365753&r2=365754&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/idot4s.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/idot4s.ll Thu Jul 11 03:37:58 2019
@@ -850,8 +850,8 @@ define amdgpu_kernel void @idot4_acc32_v
 ; GFX10-DL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX10-DL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-DL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-DL-NEXT:    v_and_b32_sdwa v3, s2, v2
-; GFX10-DL-NEXT:    v_and_b32_sdwa v2, s3, v2
+; GFX10-DL-NEXT:    v_and_b32_sdwa v3, s2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+; GFX10-DL-NEXT:    v_and_b32_sdwa v2, s3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX10-DL-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX10-DL-NEXT:    s_sext_i32_i8 s0, s2
 ; GFX10-DL-NEXT:    s_sext_i32_i8 s1, s3
@@ -1070,16 +1070,16 @@ define amdgpu_kernel void @idot4_acc16_v
 ; GFX10-DL-NEXT:    s_bfe_i32 s1, s3, 0x80000
 ; GFX10-DL-NEXT:    s_lshr_b32 s4, s2, 16
 ; GFX10-DL-NEXT:    s_lshr_b32 s5, s3, 16
-; GFX10-DL-NEXT:    v_and_b32_sdwa v4, sext(s2), v2
+; GFX10-DL-NEXT:    v_and_b32_sdwa v4, sext(s2), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX10-DL-NEXT:    v_and_b32_e32 v7, s0, v2
 ; GFX10-DL-NEXT:    v_and_b32_e32 v6, s1, v2
-; GFX10-DL-NEXT:    v_and_b32_sdwa v5, sext(s3), v2
+; GFX10-DL-NEXT:    v_and_b32_sdwa v5, sext(s3), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX10-DL-NEXT:    s_bfe_i32 s0, s4, 0x80000
 ; GFX10-DL-NEXT:    s_bfe_i32 s1, s5, 0x80000
 ; GFX10-DL-NEXT:    v_lshl_or_b32 v4, v4, 16, v7
-; GFX10-DL-NEXT:    v_and_b32_sdwa v8, sext(s4), v2
+; GFX10-DL-NEXT:    v_and_b32_sdwa v8, sext(s4), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX10-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
-; GFX10-DL-NEXT:    v_and_b32_sdwa v6, sext(s5), v2
+; GFX10-DL-NEXT:    v_and_b32_sdwa v6, sext(s5), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 ; GFX10-DL-NEXT:    v_and_b32_e32 v7, s1, v2
 ; GFX10-DL-NEXT:    v_and_b32_e32 v2, s0, v2
 ; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v4, v4, v5

Modified: llvm/trunk/test/CodeGen/AMDGPU/idot8s.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/idot8s.ll?rev=365754&r1=365753&r2=365754&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/idot8s.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/idot8s.ll Thu Jul 11 03:37:58 2019
@@ -816,54 +816,54 @@ define amdgpu_kernel void @idot8_acc8(<8
 ;
 ; GFX10-DL-LABEL: idot8_acc8:
 ; GFX10-DL:       ; %bb.0: ; %entry
-; GFX10-DL-NEXT:   s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX10-DL-NEXT:   s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GFX10-DL-NEXT:   v_mov_b32_e32 v2, 0xffff
-; GFX10-DL-NEXT:   s_movk_i32 s2, 0xff
-; GFX10-DL-NEXT:   ; implicit-def: $vcc_hi
-; GFX10-DL-NEXT:   s_waitcnt lgkmcnt(0)
-; GFX10-DL-NEXT:   s_load_dword s4, s[4:5], 0x0
-; GFX10-DL-NEXT:   s_load_dword s5, s[6:7], 0x0
-; GFX10-DL-NEXT:   v_mov_b32_e32 v0, s0
-; GFX10-DL-NEXT:   v_mov_b32_e32 v1, s1
-; GFX10-DL-NEXT:   global_load_ubyte v3, v[0:1], off
-; GFX10-DL-NEXT:   s_waitcnt lgkmcnt(0)
-; GFX10-DL-NEXT:   s_lshr_b32 s0, s4, 12
-; GFX10-DL-NEXT:   s_lshr_b32 s1, s5, 12
-; GFX10-DL-NEXT:   s_bfe_i32 s6, s4, 0x40000
-; GFX10-DL-NEXT:   s_bfe_i32 s7, s5, 0x40000
-; GFX10-DL-NEXT:   s_bfe_i32 s8, s4, 0x40004
-; GFX10-DL-NEXT:   v_lshlrev_b16_e64 v4, 12, s0
-; GFX10-DL-NEXT:   v_lshlrev_b16_e64 v5, 12, s1
-; GFX10-DL-NEXT:   s_bfe_i32 s0, s5, 0x40004
-; GFX10-DL-NEXT:   s_bfe_i32 s1, s4, 0x40008
-; GFX10-DL-NEXT:   s_bfe_i32 s9, s5, 0x40008
-; GFX10-DL-NEXT:   v_and_b32_e32 v4, v4, v2
-; GFX10-DL-NEXT:   v_and_b32_e32 v2, v5, v2
-; GFX10-DL-NEXT:   s_bfe_i32 s10, s4, 0x40010
-; GFX10-DL-NEXT:   s_bfe_i32 s11, s5, 0x40010
-; GFX10-DL-NEXT:   v_mul_i32_i24_e64 v5, s1, s9
-; GFX10-DL-NEXT:   v_ashrrev_i16_e64 v4, 12, v4
-; GFX10-DL-NEXT:   v_ashrrev_i16_e64 v2, 12, v2
-; GFX10-DL-NEXT:   s_bfe_i32 s1, s4, 0x40014
-; GFX10-DL-NEXT:   s_bfe_i32 s9, s5, 0x40014
-; GFX10-DL-NEXT:   s_bfe_i32 s12, s4, 0x40018
-; GFX10-DL-NEXT:   v_and_b32_sdwa v4, v4, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX10-DL-NEXT:   v_and_b32_sdwa v2, v2, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX10-DL-NEXT:   s_bfe_i32 s2, s5, 0x40018
-; GFX10-DL-NEXT:   s_ashr_i32 s4, s4, 28
-; GFX10-DL-NEXT:   s_ashr_i32 s5, s5, 28
-; GFX10-DL-NEXT:   s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT:   v_mad_i32_i24 v3, s6, s7, v3
-; GFX10-DL-NEXT:   v_mad_i32_i24 v3, s8, s0, v3
-; GFX10-DL-NEXT:   v_add_nc_u32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
-; GFX10-DL-NEXT:   v_mad_u32_u24 v2, v4, v2, v3
-; GFX10-DL-NEXT:   v_mad_i32_i24 v2, s10, s11, v2
-; GFX10-DL-NEXT:   v_mad_i32_i24 v2, s1, s9, v2
-; GFX10-DL-NEXT:   v_mad_i32_i24 v2, s12, s2, v2
-; GFX10-DL-NEXT:   v_mad_i32_i24 v2, s4, s5, v2
-; GFX10-DL-NEXT:   global_store_byte v[0:1], v2, off
-; GFX10-DL-NEXT:   s_endpgm
+; GFX10-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-DL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-DL-NEXT:    v_mov_b32_e32 v2, 0xffff
+; GFX10-DL-NEXT:    s_movk_i32 s2, 0xff
+; GFX10-DL-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-DL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-DL-NEXT:    s_load_dword s4, s[4:5], 0x0
+; GFX10-DL-NEXT:    s_load_dword s5, s[6:7], 0x0
+; GFX10-DL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-DL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-DL-NEXT:    global_load_ubyte v3, v[0:1], off
+; GFX10-DL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-DL-NEXT:    s_lshr_b32 s0, s4, 12
+; GFX10-DL-NEXT:    s_lshr_b32 s1, s5, 12
+; GFX10-DL-NEXT:    s_bfe_i32 s6, s4, 0x40000
+; GFX10-DL-NEXT:    s_bfe_i32 s7, s5, 0x40000
+; GFX10-DL-NEXT:    s_bfe_i32 s8, s4, 0x40004
+; GFX10-DL-NEXT:    v_lshlrev_b16_e64 v4, 12, s0
+; GFX10-DL-NEXT:    v_lshlrev_b16_e64 v5, 12, s1
+; GFX10-DL-NEXT:    s_bfe_i32 s0, s5, 0x40004
+; GFX10-DL-NEXT:    s_bfe_i32 s1, s4, 0x40008
+; GFX10-DL-NEXT:    s_bfe_i32 s9, s5, 0x40008
+; GFX10-DL-NEXT:    v_and_b32_e32 v4, v4, v2
+; GFX10-DL-NEXT:    v_and_b32_e32 v2, v5, v2
+; GFX10-DL-NEXT:    s_bfe_i32 s10, s4, 0x40010
+; GFX10-DL-NEXT:    s_bfe_i32 s11, s5, 0x40010
+; GFX10-DL-NEXT:    v_mul_i32_i24_e64 v5, s1, s9
+; GFX10-DL-NEXT:    v_ashrrev_i16_e64 v4, 12, v4
+; GFX10-DL-NEXT:    v_ashrrev_i16_e64 v2, 12, v2
+; GFX10-DL-NEXT:    s_bfe_i32 s1, s4, 0x40014
+; GFX10-DL-NEXT:    s_bfe_i32 s9, s5, 0x40014
+; GFX10-DL-NEXT:    s_bfe_i32 s12, s4, 0x40018
+; GFX10-DL-NEXT:    v_and_b32_sdwa v4, v4, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX10-DL-NEXT:    v_and_b32_sdwa v2, v2, s2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX10-DL-NEXT:    s_bfe_i32 s2, s5, 0x40018
+; GFX10-DL-NEXT:    s_ashr_i32 s4, s4, 28
+; GFX10-DL-NEXT:    s_ashr_i32 s5, s5, 28
+; GFX10-DL-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-DL-NEXT:    v_mad_i32_i24 v3, s6, s7, v3
+; GFX10-DL-NEXT:    v_mad_i32_i24 v3, s8, s0, v3
+; GFX10-DL-NEXT:    v_add_nc_u32_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
+; GFX10-DL-NEXT:    v_mad_u32_u24 v2, v4, v2, v3
+; GFX10-DL-NEXT:    v_mad_i32_i24 v2, s10, s11, v2
+; GFX10-DL-NEXT:    v_mad_i32_i24 v2, s1, s9, v2
+; GFX10-DL-NEXT:    v_mad_i32_i24 v2, s12, s2, v2
+; GFX10-DL-NEXT:    v_mad_i32_i24 v2, s4, s5, v2
+; GFX10-DL-NEXT:    global_store_byte v[0:1], v2, off
+; GFX10-DL-NEXT:    s_endpgm
                                        <8 x i4> addrspace(1)* %src2,
                                        i8 addrspace(1)* nocapture %dst) {
 entry:




More information about the llvm-commits mailing list