[llvm] 9591abd - [AMDGPU] Regenerate global-load-saddr-to-vaddr test checks

Sun Jul 25 06:05:45 PDT 2021

Author: Simon Pilgrim
Date: 2021-07-25T14:05:10+01:00
New Revision: 9591abd74e4d1230ac403a988a00f2eb319aca11

URL: https://github.com/llvm/llvm-project/commit/9591abd74e4d1230ac403a988a00f2eb319aca11
DIFF: https://github.com/llvm/llvm-project/commit/9591abd74e4d1230ac403a988a00f2eb319aca11.diff

LOG: [AMDGPU] Regenerate global-load-saddr-to-vaddr test checks

To simplify diff in future patch

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll b/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
index c897d143cd5c..88f467725dc5 100644

--- a/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
 
 ; The first load produces address in a VGPR which is used in address calculation
@@ -9,11 +10,30 @@
 ; Check that we are changing SADDR form of a load to VADDR and do not have to use
 ; readfirstlane instructions to move address from VGPRs into SGPRs.
 
-; GCN-LABEL: {{^}}test_move_load_address_to_vgpr:
-; GCN: BB{{[0-9]+}}_1:
-; GCN-NOT: v_readfirstlane_b32
-; GCN: global_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
 define amdgpu_kernel void @test_move_load_address_to_vgpr(i32 addrspace(1)* nocapture %arg) {
+; GCN-LABEL: test_move_load_address_to_vgpr:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v2, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_load_dword v1, v2, s[0:1] glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v3, s1
+; GCN-NEXT:    v_add_u32_e32 v0, 0xffffff00, v1
+; GCN-NEXT:    v_lshlrev_b64 v[1:2], 2, v[1:2]
+; GCN-NEXT:    v_add_co_u32_e32 v1, vcc, s0, v1
+; GCN-NEXT:    v_addc_co_u32_e32 v2, vcc, v3, v2, vcc
+; GCN-NEXT:  BB0_1: ; %bb3
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    global_load_dword v3, v[1:2], off glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GCN-NEXT:    v_add_co_u32_e64 v1, s[0:1], 4, v1
+; GCN-NEXT:    v_addc_co_u32_e64 v2, s[0:1], 0, v2, s[0:1]
+; GCN-NEXT:    s_and_b64 vcc, exec, vcc
+; GCN-NEXT:    s_cbranch_vccz BB0_1
+; GCN-NEXT:  ; %bb.2: ; %bb2
+; GCN-NEXT:    s_endpgm
 bb:
   %i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 0
   %i2 = load volatile i32, i32 addrspace(1)* %i1, align 4
@@ -32,10 +52,28 @@ bb3:                                              ; preds = %bb3, %bb
   br i1 %i9, label %bb2, label %bb3
 }
 
-; GCN-LABEL: {{^}}test_move_load_address_to_vgpr_d16_hi:
-; GCN-NOT: v_readfirstlane_b32
-; GCN: global_load_short_d16_hi v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
 define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1)* nocapture %arg) {
+; GCN-LABEL: test_move_load_address_to_vgpr_d16_hi:
+; GCN:       ; %bb.0: ; %bb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    global_load_ushort v0, v1, s[0:1] glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:  BB1_1: ; %bb3
+; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_lshlrev_b64 v[2:3], 1, v[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v0, s1
+; GCN-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
+; GCN-NEXT:    v_addc_co_u32_e32 v3, vcc, v0, v3, vcc
+; GCN-NEXT:    global_load_short_d16_hi v0, v[2:3], off glc
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0x100, v0
+; GCN-NEXT:    s_and_b64 vcc, exec, vcc
+; GCN-NEXT:    s_cbranch_vccz BB1_1
+; GCN-NEXT:  ; %bb.2: ; %bb2
+; GCN-NEXT:    s_endpgm
 bb:
   %i1 = getelementptr inbounds i16, i16 addrspace(1)* %arg, i64 0
   %load.pre = load volatile i16, i16 addrspace(1)* %i1, align 4