[llvm] c6b4fb8 - [AMDGPU] Add gfx10 uaddsat test coverage. NFC.

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 28 02:24:17 PDT 2021


Author: Jay Foad
Date: 2021-10-28T10:24:12+01:00
New Revision: c6b4fb87c0b1742c807af8eabb3fa645071ba396

URL: https://github.com/llvm/llvm-project/commit/c6b4fb87c0b1742c807af8eabb3fa645071ba396
DIFF: https://github.com/llvm/llvm-project/commit/c6b4fb87c0b1742c807af8eabb3fa645071ba396.diff

LOG: [AMDGPU] Add gfx10 uaddsat test coverage. NFC.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/uaddsat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index 94710c3c10ed..172514b2a1b6 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
 
 define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX6-LABEL: v_uaddsat_i8:
@@ -27,6 +28,17 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) {
 ; GFX9-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
 ; GFX9-NEXT:    v_min_u16_e32 v0, 0xff, v0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_i8:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    s_movk_i32 s4, 0xff
+; GFX10-NEXT:    v_and_b32_e32 v1, s4, v1
+; GFX10-NEXT:    v_and_b32_e32 v0, s4, v0
+; GFX10-NEXT:    v_add_nc_u16 v0, v0, v1
+; GFX10-NEXT:    v_min_u16 v0, v0, s4
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call i8 @llvm.uadd.sat.i8(i8 %lhs, i8 %rhs)
   ret i8 %result
 }
@@ -53,6 +65,13 @@ define i16 @v_uaddsat_i16(i16 %lhs, i16 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_add_u16_e64 v0, v0, v1 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_add_nc_u16 v0, v0, v1 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %result
 }
@@ -77,6 +96,13 @@ define i32 @v_uaddsat_i32(i32 %lhs, i32 %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_add_u32_e64 v0, v0, v1 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v1 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %result
 }
@@ -112,6 +138,13 @@ define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_v2i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_pk_add_u16 v0, v0, v1 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs)
   ret <2 x i16> %result
 }
@@ -154,6 +187,14 @@ define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
 ; GFX9-NEXT:    v_pk_add_u16 v1, v1, v3 clamp
 ; GFX9-NEXT:    v_pk_add_u16 v0, v0, v2 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_v3i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_pk_add_u16 v0, v0, v2 clamp
+; GFX10-NEXT:    v_pk_add_u16 v1, v1, v3 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs)
   ret <3 x i16> %result
 }
@@ -202,6 +243,14 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX9-NEXT:    v_pk_add_u16 v0, v0, v2 clamp
 ; GFX9-NEXT:    v_pk_add_u16 v1, v1, v3 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_v4i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_pk_add_u16 v0, v0, v2 clamp
+; GFX10-NEXT:    v_pk_add_u16 v1, v1, v3 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs)
   %cast = bitcast <4 x i16> %result to <2 x float>
   ret <2 x float> %cast
@@ -232,6 +281,14 @@ define <2 x i32> @v_uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; GFX9-NEXT:    v_add_u32_e64 v0, v0, v2 clamp
 ; GFX9-NEXT:    v_add_u32_e64 v1, v1, v3 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_v2i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v2 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v3 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %result
 }
@@ -266,6 +323,15 @@ define <3 x i32> @v_uaddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) {
 ; GFX9-NEXT:    v_add_u32_e64 v1, v1, v4 clamp
 ; GFX9-NEXT:    v_add_u32_e64 v2, v2, v5 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_v3i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v3 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v4 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v2, v2, v5 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs)
   ret <3 x i32> %result
 }
@@ -305,6 +371,16 @@ define <4 x i32> @v_uaddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
 ; GFX9-NEXT:    v_add_u32_e64 v2, v2, v6 clamp
 ; GFX9-NEXT:    v_add_u32_e64 v3, v3, v7 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_v4i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v4 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v5 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v2, v2, v6 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v3, v3, v7 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
   ret <4 x i32> %result
 }
@@ -364,6 +440,20 @@ define <8 x i32> @v_uaddsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) {
 ; GFX9-NEXT:    v_add_u32_e64 v6, v6, v14 clamp
 ; GFX9-NEXT:    v_add_u32_e64 v7, v7, v15 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_v8i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v8 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v9 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v2, v2, v10 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v3, v3, v11 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v4, v4, v12 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v5, v5, v13 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v6, v6, v14 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v7, v7, v15 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs)
   ret <8 x i32> %result
 }
@@ -463,6 +553,28 @@ define <16 x i32> @v_uaddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) {
 ; GFX9-NEXT:    v_add_u32_e64 v14, v14, v30 clamp
 ; GFX9-NEXT:    v_add_u32_e64 v15, v15, v31 clamp
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_v16i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_add_nc_u32_e64 v0, v0, v16 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v1, v1, v17 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v2, v2, v18 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v3, v3, v19 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v4, v4, v20 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v5, v5, v21 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v6, v6, v22 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v7, v7, v23 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v8, v8, v24 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v9, v9, v25 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v10, v10, v26 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v11, v11, v27 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v12, v12, v28 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v13, v13, v29 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v14, v14, v30 clamp
+; GFX10-NEXT:    v_add_nc_u32_e64 v15, v15, v31 clamp
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs)
   ret <16 x i32> %result
 }
@@ -498,6 +610,17 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddsat_i64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v0, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, -1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v3, -1, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %result
 }


        


More information about the llvm-commits mailing list