[llvm] [AMDGPU] Add test for readfirstlane with i1 type (PR #109657)

Piotr Sobczak via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 23 06:22:56 PDT 2024


https://github.com/piotrAMD updated https://github.com/llvm/llvm-project/pull/109657

>From 56ed901a32d453dee9c69845b0e647e9f20b239d Mon Sep 17 00:00:00 2001
From: Piotr Sobczak <piotr.sobczak at amd.com>
Date: Mon, 23 Sep 2024 14:05:27 +0200
Subject: [PATCH 1/2] [AMDGPU] Add test for readfirstlane with i1 type

Add test for readfirstlane with i1 type to demonstrate the lowering works.

Also simplify existing tests a bit - the declarations are not strictly
needed anymore.
---
 .../AMDGPU/llvm.amdgcn.readfirstlane.ll       | 91 +++++++++++++++----
 1 file changed, 71 insertions(+), 20 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
index b061d53de5d3c5..9cb3ffeb26b41e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
@@ -2,11 +2,65 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -global-isel < %s | FileCheck -check-prefix=CHECK-GISEL -enable-var-scope %s
 
-declare i32 @llvm.amdgcn.readfirstlane(i32) #0
-declare i64 @llvm.amdgcn.readfirstlane.i64(i64) #0
-declare double @llvm.amdgcn.readfirstlane.f64(double) #0
+define void @test_readfirstlane_i1(ptr addrspace(1) %out, i1 %src) {
+; CHECK-SDAG-LABEL: test_readfirstlane_i1:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
+; CHECK-SDAG-NEXT:    s_and_b32 s4, s4, 1
+; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s4
+; CHECK-SDAG-NEXT:    flat_store_byte v[0:1], v2
+; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; CHECK-GISEL-LABEL: test_readfirstlane_i1:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
+; CHECK-GISEL-NEXT:    s_and_b32 s4, s4, 1
+; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s4
+; CHECK-GISEL-NEXT:    flat_store_byte v[0:1], v2
+; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
+  store i1 %readfirstlane, ptr addrspace(1) %out, align 4
+  ret void
+}
 
-define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) #1 {
+define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %src1) {
+; CHECK-SDAG-LABEL: test_readfirstlane_i1_select:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-SDAG-NEXT:    v_cmp_lt_u32_e32 vcc, 42, v2
+; CHECK-SDAG-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v4
+; CHECK-SDAG-NEXT:    s_bitcmp1_b32 s4, 0
+; CHECK-SDAG-NEXT:    s_cselect_b64 vcc, -1, 0
+; CHECK-SDAG-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; CHECK-SDAG-NEXT:    flat_store_dword v[0:1], v2
+; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; CHECK-GISEL-LABEL: test_readfirstlane_i1_select:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-GISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 42, v2
+; CHECK-GISEL-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
+; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v4
+; CHECK-GISEL-NEXT:    s_and_b32 s4, 1, s4
+; CHECK-GISEL-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s4
+; CHECK-GISEL-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; CHECK-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp ugt i32 %src, 42
+  %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %cmp)
+  %sel = select i1 %readfirstlane, i32 %src, i32 %src1
+  store i32 %sel, ptr addrspace(1) %out, align 4
+  ret void
+}
+
+define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_i32:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -29,7 +83,7 @@ define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) #1 {
   ret void
 }
 
-define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) #1 {
+define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_i64:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -56,7 +110,7 @@ define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) #1 {
   ret void
 }
 
-define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) #1 {
+define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_f64:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -83,7 +137,7 @@ define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) #1 {
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_i32:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_mov_b32 s0, 32
@@ -104,7 +158,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) #1
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_i64:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_mov_b64 s[0:1], 32
@@ -125,7 +179,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) #1
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_f64:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_mov_b32 s0, 0
@@ -148,7 +202,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) #1
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i32:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -173,7 +227,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i64:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -201,7 +255,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_f64:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -230,7 +284,7 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_m0:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -262,7 +316,7 @@ define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 {
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i32:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -294,7 +348,7 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i64:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -328,7 +382,7 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_f64:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -362,7 +416,7 @@ define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1
   ret void
 }
 
-define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) #1 {
+define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_fi:
 ; CHECK-SDAG:       ; %bb.0:
 ; CHECK-SDAG-NEXT:    s_add_u32 s0, s0, s15
@@ -593,6 +647,3 @@ define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) {
   call void asm sideeffect "; use $0", "s"(<8 x i16> %x)
   ret void
 }
-
-attributes #0 = { nounwind readnone convergent }
-attributes #1 = { nounwind }

>From 4e03d7a724daf49e00cb0213a5f41f86c466d960 Mon Sep 17 00:00:00 2001
From: Piotr Sobczak <piotr.sobczak at amd.com>
Date: Mon, 23 Sep 2024 15:22:17 +0200
Subject: [PATCH 2/2] Add more tests: inreg, load

---
 .../AMDGPU/llvm.amdgcn.readfirstlane.ll       | 53 +++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
index 9cb3ffeb26b41e..39a3b1c8adc9f1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
@@ -27,6 +27,29 @@ define void @test_readfirstlane_i1(ptr addrspace(1) %out, i1 %src) {
   ret void
 }
 
+define void @test_readfirstlane_i1_inreg(ptr addrspace(1) %out, i1 inreg %src) {
+; CHECK-SDAG-LABEL: test_readfirstlane_i1_inreg:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-SDAG-NEXT:    s_and_b32 s4, s6, 1
+; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s4
+; CHECK-SDAG-NEXT:    flat_store_byte v[0:1], v2
+; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; CHECK-GISEL-LABEL: test_readfirstlane_i1_inreg:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-GISEL-NEXT:    s_and_b32 s4, s6, 1
+; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s4
+; CHECK-GISEL-NEXT:    flat_store_byte v[0:1], v2
+; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src)
+  store i1 %readfirstlane, ptr addrspace(1) %out, align 4
+  ret void
+}
+
 define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %src1) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_i1_select:
 ; CHECK-SDAG:       ; %bb.0:
@@ -60,6 +83,36 @@ define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %
   ret void
 }
 
+define void @test_readfirstlane_i1_load(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; CHECK-SDAG-LABEL: test_readfirstlane_i1_load:
+; CHECK-SDAG:       ; %bb.0:
+; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-SDAG-NEXT:    flat_load_ubyte v2, v[2:3]
+; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-SDAG-NEXT:    v_readfirstlane_b32 s4, v2
+; CHECK-SDAG-NEXT:    s_and_b32 s4, s4, 1
+; CHECK-SDAG-NEXT:    v_mov_b32_e32 v2, s4
+; CHECK-SDAG-NEXT:    flat_store_byte v[0:1], v2
+; CHECK-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; CHECK-GISEL-LABEL: test_readfirstlane_i1_load:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-GISEL-NEXT:    flat_load_ubyte v2, v[2:3]
+; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-GISEL-NEXT:    v_readfirstlane_b32 s4, v2
+; CHECK-GISEL-NEXT:    s_and_b32 s4, s4, 1
+; CHECK-GISEL-NEXT:    v_mov_b32_e32 v2, s4
+; CHECK-GISEL-NEXT:    flat_store_byte v[0:1], v2
+; CHECK-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %load = load i1, ptr addrspace(1) %in
+  %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %load)
+  store i1 %readfirstlane, ptr addrspace(1) %out, align 4
+  ret void
+}
+
 define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) {
 ; CHECK-SDAG-LABEL: test_readfirstlane_i32:
 ; CHECK-SDAG:       ; %bb.0:



More information about the llvm-commits mailing list