[llvm] Add insert/extract test to test the legal form of instruction V_CNDMA… (PR #79384)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 24 16:00:54 PST 2024


================
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+define amdgpu_kernel void @_Z8Kernel3DI3APE11GaugeAPEArgEvT0_(i32 %inc.i.i, i32 %dr.037.i.i) #0 {
+; GFX90A-LABEL: _Z8Kernel3DI3APE11GaugeAPEArgEvT0_:
+; GFX90A:       ; %bb.0: ; %entry
+; GFX90A-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
+; GFX90A-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
+; GFX90A-NEXT:    s_add_u32 s0, s0, s15
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX90A-NEXT:    s_add_u32 s8, s6, 8
+; GFX90A-NEXT:    s_addc_u32 s9, s7, 0
+; GFX90A-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GFX90A-NEXT:    s_getpc_b64 s[6:7]
+; GFX90A-NEXT:    s_add_u32 s6, s6, _ZN3__XcviEv at gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s7, s7, _ZN3__XcviEv at gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX90A-NEXT:    s_mov_b32 s4, 0
+; GFX90A-NEXT:    s_mov_b32 s5, s35
+; GFX90A-NEXT:    s_and_b64 vcc, exec, -1
+; GFX90A-NEXT:    s_mov_b32 s6, 0
+; GFX90A-NEXT:    s_mov_b32 s7, 0
+; GFX90A-NEXT:    s_mov_b32 s8, 0
+; GFX90A-NEXT:  .LBB0_1: ; %for.body.i.i
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 1
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s9, s6, s4
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s9, s7, s9
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 3
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s9, s8, s9
+; GFX90A-NEXT:    s_or_b32 s9, s9, s34
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 1
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[12:13], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s6, s9, s6
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 3
+; GFX90A-NEXT:    s_cselect_b64 s[12:13], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[14:15], s[12:13], exec
+; GFX90A-NEXT:    s_cselect_b32 s8, s9, s8
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX90A-NEXT:    s_cselect_b64 s[14:15], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[16:17], s[14:15], exec
+; GFX90A-NEXT:    s_cselect_b32 s7, s9, s7
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 0
+; GFX90A-NEXT:    s_cselect_b32 s4, s9, s4
+; GFX90A-NEXT:    s_or_b64 s[10:11], s[14:15], s[10:11]
+; GFX90A-NEXT:    s_or_b64 s[10:11], s[12:13], s[10:11]
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[10:11]
+; GFX90A-NEXT:    s_mov_b64 vcc, vcc
+; GFX90A-NEXT:    s_cbranch_vccnz .LBB0_1
+; GFX90A-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; GFX90A-NEXT:    s_endpgm
+entry:
+  %call.i = call i32 @_ZN3__XcviEv()
----------------
jrbyrnes wrote:

Can you remove the call.

https://github.com/llvm/llvm-project/pull/79384


More information about the llvm-commits mailing list