[llvm] 11d844f - AMDGPU/GlobalISel: Add missing run line for gfx7 packed operations

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 31 23:31:50 PDT 2022


Author: Matt Arsenault
Date: 2022-10-31T23:31:42-07:00
New Revision: 11d844f96dfb6cf6105cdfa8ebfe518f77bb988a

URL: https://github.com/llvm/llvm-project/commit/11d844f96dfb6cf6105cdfa8ebfe518f77bb988a
DIFF: https://github.com/llvm/llvm-project/commit/11d844f96dfb6cf6105cdfa8ebfe518f77bb988a.diff

LOG: AMDGPU/GlobalISel: Add missing run line for gfx7 packed operations

We were only testing targets with legal 16-bit operations.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
index bcfdd70027226..e7e5f773129b4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
@@ -1,10 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
 
 define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; GFX7-LABEL: v_add_v2i16:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: v_add_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -30,6 +38,18 @@ define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
 }
 
 define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
+; GFX7-LABEL: v_add_v2i16_fneg_lhs:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
+; GFX7-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: v_add_v2i16_fneg_lhs:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -58,6 +78,18 @@ define <2 x i16> @v_add_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
 }
 
 define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
+; GFX7-LABEL: v_add_v2i16_fneg_rhs:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; GFX7-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX7-NEXT:    v_or_b32_e32 v2, v3, v2
+; GFX7-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: v_add_v2i16_fneg_rhs:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -86,6 +118,23 @@ define <2 x i16> @v_add_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
 }
 
 define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
+; GFX7-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
+; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
+; GFX7-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX7-NEXT:    v_or_b32_e32 v1, v1, v2
+; GFX7-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
+; GFX7-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; GFX7-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX7-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT:    v_add_i32_e32 v1, vcc, v2, v3
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: v_add_v2i16_fneg_lhs_fneg_rhs:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -117,6 +166,14 @@ define <2 x i16> @v_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
 }
 
 define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
+; GFX7-LABEL: v_add_v2i16_neg_inline_imm_splat:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    s_movk_i32 s4, 0xffc0
+; GFX7-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
+; GFX7-NEXT:    v_add_i32_e32 v1, vcc, s4, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -144,6 +201,13 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
 }
 
 define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
+; GFX7-LABEL: v_add_v2i16_neg_inline_imm_lo:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 0xffffffc0, v0
+; GFX7-NEXT:    v_add_i32_e32 v1, vcc, 4, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_lo:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -171,6 +235,13 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_lo(<2 x i16> %a) {
 }
 
 define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
+; GFX7-LABEL: v_add_v2i16_neg_inline_imm_hi:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 4, v0
+; GFX7-NEXT:    v_add_i32_e32 v1, vcc, 0xffffffc0, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: v_add_v2i16_neg_inline_imm_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -198,6 +269,16 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
 }
 
 define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
+; GFX7-LABEL: s_add_v2i16_neg_inline_imm_splat:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_sub_i32 s1, s1, 64
+; GFX7-NEXT:    s_sub_i32 s0, s0, 64
+; GFX7-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_or_b32 s0, s0, s1
+; GFX7-NEXT:    ; return to shader part epilog
+;
 ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_splat:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
@@ -230,6 +311,16 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
 }
 
 define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
+; GFX7-LABEL: s_add_v2i16_neg_inline_imm_lo:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_i32 s1, s1, 4
+; GFX7-NEXT:    s_sub_i32 s0, s0, 64
+; GFX7-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_or_b32 s0, s0, s1
+; GFX7-NEXT:    ; return to shader part epilog
+;
 ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_lo:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
@@ -262,6 +353,16 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
 }
 
 define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
+; GFX7-LABEL: s_add_v2i16_neg_inline_imm_hi:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_sub_i32 s1, s1, 64
+; GFX7-NEXT:    s_add_i32 s0, s0, 4
+; GFX7-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_or_b32 s0, s0, s1
+; GFX7-NEXT:    ; return to shader part epilog
+;
 ; GFX9-LABEL: s_add_v2i16_neg_inline_imm_hi:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
@@ -294,6 +395,16 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
 }
 
 define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
+; GFX7-LABEL: s_add_v2i16:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_add_i32 s1, s1, s3
+; GFX7-NEXT:    s_add_i32 s0, s0, s2
+; GFX7-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_or_b32 s0, s0, s1
+; GFX7-NEXT:    ; return to shader part epilog
+;
 ; GFX9-LABEL: s_add_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_lshr_b32 s2, s0, 16
@@ -330,6 +441,21 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
 }
 
 define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) {
+; GFX7-LABEL: s_add_v2i16_fneg_lhs:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT:    s_or_b32 s0, s1, s0
+; GFX7-NEXT:    s_xor_b32 s0, s0, 0x80008000
+; GFX7-NEXT:    s_lshr_b32 s1, s0, 16
+; GFX7-NEXT:    s_add_i32 s1, s1, s3
+; GFX7-NEXT:    s_add_i32 s0, s0, s2
+; GFX7-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_or_b32 s0, s0, s1
+; GFX7-NEXT:    ; return to shader part epilog
+;
 ; GFX9-LABEL: s_add_v2i16_fneg_lhs:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_xor_b32 s0, s0, 0x80008000
@@ -371,6 +497,21 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
 }
 
 define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) {
+; GFX7-LABEL: s_add_v2i16_fneg_rhs:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_lshl_b32 s3, s3, 16
+; GFX7-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX7-NEXT:    s_or_b32 s2, s3, s2
+; GFX7-NEXT:    s_xor_b32 s2, s2, 0x80008000
+; GFX7-NEXT:    s_lshr_b32 s3, s2, 16
+; GFX7-NEXT:    s_add_i32 s1, s1, s3
+; GFX7-NEXT:    s_add_i32 s0, s0, s2
+; GFX7-NEXT:    s_and_b32 s1, s1, 0xffff
+; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_or_b32 s0, s0, s1
+; GFX7-NEXT:    ; return to shader part epilog
+;
 ; GFX9-LABEL: s_add_v2i16_fneg_rhs:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_xor_b32 s1, s1, 0x80008000
@@ -412,6 +553,26 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
 }
 
 define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) {
+; GFX7-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT:    s_or_b32 s0, s1, s0
+; GFX7-NEXT:    s_lshl_b32 s1, s3, 16
+; GFX7-NEXT:    s_and_b32 s2, s2, 0xffff
+; GFX7-NEXT:    s_or_b32 s1, s1, s2
+; GFX7-NEXT:    s_xor_b32 s0, s0, 0x80008000
+; GFX7-NEXT:    s_xor_b32 s1, s1, 0x80008000
+; GFX7-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX7-NEXT:    s_lshr_b32 s3, s1, 16
+; GFX7-NEXT:    s_add_i32 s2, s2, s3
+; GFX7-NEXT:    s_add_i32 s0, s0, s1
+; GFX7-NEXT:    s_and_b32 s1, s2, 0xffff
+; GFX7-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_or_b32 s0, s0, s1
+; GFX7-NEXT:    ; return to shader part epilog
+;
 ; GFX9-LABEL: s_add_v2i16_fneg_lhs_fneg_rhs:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_xor_b32 s0, s0, 0x80008000


        


More information about the llvm-commits mailing list