[llvm] [AMDGPU] Add insert/extract test to test instruction V_CNDMASK_B32 (PR #79384)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 24 19:25:23 PST 2024


https://github.com/choikwa updated https://github.com/llvm/llvm-project/pull/79384

>From 816f14d94166958e9ec668cc647bec9b1be277dd Mon Sep 17 00:00:00 2001
From: Kevin Choi <kevin.choi at amd.com>
Date: Wed, 24 Jan 2024 16:35:06 -0600
Subject: [PATCH] [AMDGPU] Add insert/extract test to test instruction
 V_CNDMASK_B32

---
 .../CodeGen/AMDGPU/insert_extract_element.ll  | 218 ++++++++++++++++++
 1 file changed, 218 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/insert_extract_element.ll

diff --git a/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll b/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll
new file mode 100644
index 000000000000000..2e827ede9b8073f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert_extract_element.ll
@@ -0,0 +1,218 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90a %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s
+
+define amdgpu_kernel void @test_insert_extract(i32 %inc.i.i, i32 %dr.037.i.i) {
+; GFX90a-LABEL: test_insert_extract:
+; GFX90a:       ; %bb.0: ; %entry
+; GFX90a-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX90a-NEXT:    s_mov_b32 s2, 0
+; GFX90a-NEXT:    s_and_b64 vcc, exec, -1
+; GFX90a-NEXT:    s_mov_b32 s3, 0
+; GFX90a-NEXT:    s_mov_b32 s4, 0
+; GFX90a-NEXT:    s_mov_b32 s5, 0
+; GFX90a-NEXT:    s_mov_b32 s6, 0
+; GFX90a-NEXT:  .LBB0_1: ; %for.body.i.i
+; GFX90a-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90a-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90a-NEXT:    s_cmp_eq_u32 s1, 1
+; GFX90a-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; GFX90a-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX90a-NEXT:    s_cselect_b32 s7, s4, s3
+; GFX90a-NEXT:    s_cmp_eq_u32 s1, 2
+; GFX90a-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; GFX90a-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX90a-NEXT:    s_cselect_b32 s7, s5, s7
+; GFX90a-NEXT:    s_cmp_eq_u32 s1, 3
+; GFX90a-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; GFX90a-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX90a-NEXT:    s_cselect_b32 s7, s6, s7
+; GFX90a-NEXT:    s_or_b32 s7, s7, s0
+; GFX90a-NEXT:    s_cmp_eq_u32 s1, 1
+; GFX90a-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; GFX90a-NEXT:    s_and_b64 s[10:11], s[8:9], exec
+; GFX90a-NEXT:    s_cselect_b32 s4, s7, s4
+; GFX90a-NEXT:    s_cmp_eq_u32 s1, 3
+; GFX90a-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX90a-NEXT:    s_and_b64 s[12:13], s[10:11], exec
+; GFX90a-NEXT:    s_cselect_b32 s6, s7, s6
+; GFX90a-NEXT:    s_cmp_eq_u32 s1, 2
+; GFX90a-NEXT:    s_cselect_b64 s[12:13], -1, 0
+; GFX90a-NEXT:    s_and_b64 s[14:15], s[12:13], exec
+; GFX90a-NEXT:    s_cselect_b32 s5, s7, s5
+; GFX90a-NEXT:    s_cmp_eq_u32 s1, 0
+; GFX90a-NEXT:    s_cselect_b32 s3, s7, s3
+; GFX90a-NEXT:    s_or_b64 s[8:9], s[12:13], s[8:9]
+; GFX90a-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
+; GFX90a-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX90a-NEXT:    s_cselect_b32 s2, 0, s2
+; GFX90a-NEXT:    s_mov_b64 vcc, vcc
+; GFX90a-NEXT:    s_cbranch_vccnz .LBB0_1
+; GFX90a-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; GFX90a-NEXT:    s_endpgm
+;
+; GFX940-LABEL: test_insert_extract:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX940-NEXT:    s_mov_b32 s2, 0
+; GFX940-NEXT:    s_and_b64 vcc, exec, -1
+; GFX940-NEXT:    s_mov_b32 s3, 0
+; GFX940-NEXT:    s_mov_b32 s4, 0
+; GFX940-NEXT:    s_mov_b32 s5, 0
+; GFX940-NEXT:    s_mov_b32 s6, 0
+; GFX940-NEXT:  .LBB0_1: ; %for.body.i.i
+; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    s_cmp_eq_u32 s1, 1
+; GFX940-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; GFX940-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX940-NEXT:    s_cselect_b32 s7, s4, s3
+; GFX940-NEXT:    s_cmp_eq_u32 s1, 2
+; GFX940-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; GFX940-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX940-NEXT:    s_cselect_b32 s7, s5, s7
+; GFX940-NEXT:    s_cmp_eq_u32 s1, 3
+; GFX940-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; GFX940-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX940-NEXT:    s_cselect_b32 s7, s6, s7
+; GFX940-NEXT:    s_or_b32 s7, s7, s0
+; GFX940-NEXT:    s_cmp_eq_u32 s1, 1
+; GFX940-NEXT:    s_cselect_b64 s[8:9], -1, 0
+; GFX940-NEXT:    s_and_b64 s[10:11], s[8:9], exec
+; GFX940-NEXT:    s_cselect_b32 s4, s7, s4
+; GFX940-NEXT:    s_cmp_eq_u32 s1, 3
+; GFX940-NEXT:    s_cselect_b64 s[10:11], -1, 0
+; GFX940-NEXT:    s_and_b64 s[12:13], s[10:11], exec
+; GFX940-NEXT:    s_cselect_b32 s6, s7, s6
+; GFX940-NEXT:    s_cmp_eq_u32 s1, 2
+; GFX940-NEXT:    s_cselect_b64 s[12:13], -1, 0
+; GFX940-NEXT:    s_and_b64 s[14:15], s[12:13], exec
+; GFX940-NEXT:    s_cselect_b32 s5, s7, s5
+; GFX940-NEXT:    s_cmp_eq_u32 s1, 0
+; GFX940-NEXT:    s_cselect_b32 s3, s7, s3
+; GFX940-NEXT:    s_or_b64 s[8:9], s[12:13], s[8:9]
+; GFX940-NEXT:    s_or_b64 s[8:9], s[10:11], s[8:9]
+; GFX940-NEXT:    s_and_b64 s[8:9], s[8:9], exec
+; GFX940-NEXT:    s_cselect_b32 s2, 0, s2
+; GFX940-NEXT:    s_mov_b64 vcc, vcc
+; GFX940-NEXT:    s_cbranch_vccnz .LBB0_1
+; GFX940-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; GFX940-NEXT:    s_endpgm
+;
+; GFX1030-LABEL: test_insert_extract:
+; GFX1030:       ; %bb.0: ; %entry
+; GFX1030-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX1030-NEXT:    s_mov_b32 s2, 0
+; GFX1030-NEXT:    s_mov_b32 s3, 0
+; GFX1030-NEXT:    s_mov_b32 s4, 0
+; GFX1030-NEXT:    s_mov_b32 s5, 0
+; GFX1030-NEXT:    s_mov_b32 s6, 0
+; GFX1030-NEXT:    s_mov_b32 vcc_lo, exec_lo
+; GFX1030-NEXT:    .p2align 6
+; GFX1030-NEXT:  .LBB0_1: ; %for.body.i.i
+; GFX1030-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1030-NEXT:    s_cmp_eq_u32 s1, 1
+; GFX1030-NEXT:    s_cselect_b32 s7, -1, 0
+; GFX1030-NEXT:    s_and_b32 s7, s7, exec_lo
+; GFX1030-NEXT:    s_cselect_b32 s7, s4, s3
+; GFX1030-NEXT:    s_cmp_eq_u32 s1, 2
+; GFX1030-NEXT:    s_cselect_b32 s8, -1, 0
+; GFX1030-NEXT:    s_and_b32 s8, s8, exec_lo
+; GFX1030-NEXT:    s_cselect_b32 s7, s5, s7
+; GFX1030-NEXT:    s_cmp_eq_u32 s1, 3
+; GFX1030-NEXT:    s_cselect_b32 s8, -1, 0
+; GFX1030-NEXT:    s_and_b32 s8, s8, exec_lo
+; GFX1030-NEXT:    s_cselect_b32 s7, s6, s7
+; GFX1030-NEXT:    s_or_b32 s7, s7, s0
+; GFX1030-NEXT:    s_cmp_eq_u32 s1, 1
+; GFX1030-NEXT:    s_cselect_b32 s8, -1, 0
+; GFX1030-NEXT:    s_and_b32 s9, s8, exec_lo
+; GFX1030-NEXT:    s_cselect_b32 s4, s7, s4
+; GFX1030-NEXT:    s_cmp_eq_u32 s1, 3
+; GFX1030-NEXT:    s_cselect_b32 s9, -1, 0
+; GFX1030-NEXT:    s_and_b32 s10, s9, exec_lo
+; GFX1030-NEXT:    s_cselect_b32 s6, s7, s6
+; GFX1030-NEXT:    s_cmp_eq_u32 s1, 2
+; GFX1030-NEXT:    s_cselect_b32 s10, -1, 0
+; GFX1030-NEXT:    s_and_b32 s11, s10, exec_lo
+; GFX1030-NEXT:    s_cselect_b32 s5, s7, s5
+; GFX1030-NEXT:    s_cmp_eq_u32 s1, 0
+; GFX1030-NEXT:    s_cselect_b32 s3, s7, s3
+; GFX1030-NEXT:    s_or_b32 s7, s10, s8
+; GFX1030-NEXT:    s_or_b32 s7, s9, s7
+; GFX1030-NEXT:    s_and_b32 s7, s7, exec_lo
+; GFX1030-NEXT:    s_cselect_b32 s2, 0, s2
+; GFX1030-NEXT:    s_cbranch_vccnz .LBB0_1
+; GFX1030-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; GFX1030-NEXT:    s_endpgm
+;
+; GFX1100-LABEL: test_insert_extract:
+; GFX1100:       ; %bb.0: ; %entry
+; GFX1100-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX1100-NEXT:    s_mov_b32 s2, 0
+; GFX1100-NEXT:    s_mov_b32 s3, 0
+; GFX1100-NEXT:    s_mov_b32 s4, 0
+; GFX1100-NEXT:    s_mov_b32 s5, 0
+; GFX1100-NEXT:    s_mov_b32 s6, 0
+; GFX1100-NEXT:    s_mov_b32 vcc_lo, exec_lo
+; GFX1100-NEXT:    .p2align 6
+; GFX1100-NEXT:  .LBB0_1: ; %for.body.i.i
+; GFX1100-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX1100-NEXT:    s_cmp_eq_u32 s1, 1
+; GFX1100-NEXT:    s_cselect_b32 s7, -1, 0
+; GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX1100-NEXT:    s_and_b32 s7, s7, exec_lo
+; GFX1100-NEXT:    s_cselect_b32 s7, s4, s3
+; GFX1100-NEXT:    s_cmp_eq_u32 s1, 2
+; GFX1100-NEXT:    s_cselect_b32 s8, -1, 0
+; GFX1100-NEXT:    s_and_b32 s8, s8, exec_lo
+; GFX1100-NEXT:    s_cselect_b32 s7, s5, s7
+; GFX1100-NEXT:    s_cmp_eq_u32 s1, 3
+; GFX1100-NEXT:    s_cselect_b32 s8, -1, 0
+; GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1100-NEXT:    s_and_b32 s8, s8, exec_lo
+; GFX1100-NEXT:    s_cselect_b32 s7, s6, s7
+; GFX1100-NEXT:    s_or_b32 s7, s7, s0
+; GFX1100-NEXT:    s_cmp_eq_u32 s1, 1
+; GFX1100-NEXT:    s_cselect_b32 s8, -1, 0
+; GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
+; GFX1100-NEXT:    s_and_b32 s9, s8, exec_lo
+; GFX1100-NEXT:    s_cselect_b32 s4, s7, s4
+; GFX1100-NEXT:    s_cmp_eq_u32 s1, 3
+; GFX1100-NEXT:    s_cselect_b32 s9, -1, 0
+; GFX1100-NEXT:    s_and_b32 s10, s9, exec_lo
+; GFX1100-NEXT:    s_cselect_b32 s6, s7, s6
+; GFX1100-NEXT:    s_cmp_eq_u32 s1, 2
+; GFX1100-NEXT:    s_cselect_b32 s10, -1, 0
+; GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1100-NEXT:    s_and_b32 s11, s10, exec_lo
+; GFX1100-NEXT:    s_cselect_b32 s5, s7, s5
+; GFX1100-NEXT:    s_cmp_eq_u32 s1, 0
+; GFX1100-NEXT:    s_cselect_b32 s3, s7, s3
+; GFX1100-NEXT:    s_or_b32 s7, s10, s8
+; GFX1100-NEXT:    s_or_b32 s7, s9, s7
+; GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1100-NEXT:    s_and_b32 s7, s7, exec_lo
+; GFX1100-NEXT:    s_cselect_b32 s2, 0, s2
+; GFX1100-NEXT:    s_cbranch_vccnz .LBB0_1
+; GFX1100-NEXT:  ; %bb.2: ; %DummyReturnBlock
+; GFX1100-NEXT:    s_endpgm
+entry:
+  %0 = insertelement <4 x i32> zeroinitializer, i32 0, i64 0
+  br label %for.body.i.i
+
+for.body.i.i:                                     ; preds = %for.body.i.i, %entry
+  %x.sroa.0.036.i.i = phi <4 x i32> [ %0, %entry ], [ %4, %for.body.i.i ]
+  %X.sroa.0.035.i.i = phi <4 x i32> [ zeroinitializer, %entry ], [ %2, %for.body.i.i ]
+  %idxprom.i.i = zext i32 %dr.037.i.i to i64
+  %1 = extractelement <4 x i32> %X.sroa.0.035.i.i, i64 %idxprom.i.i
+  %add.i.i = or i32 %1, %inc.i.i
+  %2 = insertelement <4 x i32> %X.sroa.0.035.i.i, i32 %add.i.i, i64 %idxprom.i.i
+  %3 = extractelement <4 x i32> %x.sroa.0.036.i.i, i64 %idxprom.i.i
+  %4 = insertelement <4 x i32> %x.sroa.0.036.i.i, i32 %3, i64 0
+  br label %for.body.i.i
+}



More information about the llvm-commits mailing list