[llvm] Add insert/extract test to test the legal form of instruction V_CNDMA… (PR #79384)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 24 14:50:59 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: choikwa (choikwa)

<details>
<summary>Changes</summary>

…SK_B32



---
Full diff: https://github.com/llvm/llvm-project/pull/79384.diff


1 Files Affected:

- (added) llvm/test/CodeGen/AMDGPU/insert_extract_element.ll (+87) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll b/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll
new file mode 100644
index 000000000000000..16f499f060ac5e5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+define amdgpu_kernel void @_Z8Kernel3DI3APE11GaugeAPEArgEvT0_(i32 %inc.i.i, i32 %dr.037.i.i) #0 {
+; GFX90A-LABEL: _Z8Kernel3DI3APE11GaugeAPEArgEvT0_:
+; GFX90A:       ; %bb.0: ; %entry
+; GFX90A-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
+; GFX90A-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
+; GFX90A-NEXT:    s_add_u32 s0, s0, s15
+; GFX90A-NEXT:    s_addc_u32 s1, s1, 0
+; GFX90A-NEXT:    s_mov_b64 s[10:11], s[8:9]
+; GFX90A-NEXT:    s_add_u32 s8, s6, 8
+; GFX90A-NEXT:    s_addc_u32 s9, s7, 0
+; GFX90A-NEXT:    s_load_dwordx2 s[34:35], s[6:7], 0x0
+; GFX90A-NEXT:    s_getpc_b64 s[6:7]
+; GFX90A-NEXT:    s_add_u32 s6, s6, _ZN3__XcviEv at gotpcrel32@lo+4
+; GFX90A-NEXT:    s_addc_u32 s7, s7, _ZN3__XcviEv at gotpcrel32@hi+12
+; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; GFX90A-NEXT:    v_mov_b32_e32 v31, v0
+; GFX90A-NEXT:    s_mov_b32 s32, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_swappc_b64 s[30:31], s[6:7]
+; GFX90A-NEXT:    s_mov_b32 s4, 0
+; GFX90A-NEXT:    s_mov_b32 s5, s35
+; GFX90A-NEXT:    s_and_b64 vcc, exec, -1
+; GFX90A-NEXT:    s_mov_b32 s6, 0
+; GFX90A-NEXT:    s_mov_b32 s7, 0
+; GFX90A-NEXT:    s_mov_b32 s8, 0
+; GFX90A-NEXT:  .LBB0_1: ; %for.body.i.i
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 1
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s9, s6, s4
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s9, s7, s9
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 3
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[10:11], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s9, s8, s9
+; GFX90A-NEXT:    s_or_b32 s9, s9, s34
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 1
+; GFX90A-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[12:13], s[10:11], exec
+; GFX90A-NEXT:    s_cselect_b32 s6, s9, s6
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 3
+; GFX90A-NEXT:    s_cselect_b64 s[12:13], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[14:15], s[12:13], exec
+; GFX90A-NEXT:    s_cselect_b32 s8, s9, s8
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 2
+; GFX90A-NEXT:    s_cselect_b64 s[14:15], -1, 0
+; GFX90A-NEXT:    s_and_b64 s[16:17], s[14:15], exec
+; GFX90A-NEXT:    s_cselect_b32 s7, s9, s7
+; GFX90A-NEXT:    s_cmp_eq_u32 s5, 0
+; GFX90A-NEXT:    s_cselect_b32 s4, s9, s4
+; GFX90A-NEXT:    s_or_b64 s[10:11], s[14:15], s[10:11]
+; GFX90A-NEXT:    s_or_b64 s[10:11], s[12:13], s[10:11]
+; GFX90A-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[10:11]
+; GFX90A-NEXT:    s_mov_b64 vcc, vcc
+; GFX90A-NEXT:    s_cbranch_vccnz .LBB0_1
+; GFX90A-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; GFX90A-NEXT:    s_endpgm
+entry:
+  %call.i = call i32 @_ZN3__XcviEv()
+  %0 = insertelement <4 x i32> zeroinitializer, i32 %call.i, i64 0
+  br label %for.body.i.i
+
+for.body.i.i:                                     ; preds = %for.body.i.i, %entry
+  %x.sroa.0.036.i.i = phi <4 x i32> [ %0, %entry ], [ %4, %for.body.i.i ]
+  %X.sroa.0.035.i.i = phi <4 x i32> [ zeroinitializer, %entry ], [ %2, %for.body.i.i ]
+  %idxprom.i.i = zext i32 %dr.037.i.i to i64
+  %1 = extractelement <4 x i32> %X.sroa.0.035.i.i, i64 %idxprom.i.i
+  %add.i.i = or i32 %1, %inc.i.i
+  %2 = insertelement <4 x i32> %X.sroa.0.035.i.i, i32 %add.i.i, i64 %idxprom.i.i
+  %3 = extractelement <4 x i32> %x.sroa.0.036.i.i, i64 %idxprom.i.i
+  %4 = insertelement <4 x i32> %x.sroa.0.036.i.i, i32 %3, i64 0
+  br label %for.body.i.i
+}
+
+declare i32 @_ZN3__XcviEv()
+
+attributes #0 = { "target-cpu"="gfx90a" }

``````````

</details>


https://github.com/llvm/llvm-project/pull/79384


More information about the llvm-commits mailing list