[llvm] [AMDGPU] Try to reuse in v_cndmask register with constant from compare. (PR #131146)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 13 07:10:19 PDT 2025


================
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs  < %s | FileCheck %s -check-prefix=GFX9
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs  < %s | FileCheck %s -check-prefix=GFX10
+
+define float @f32_oeq_v_i(float %arg, float %arg1) {
+; GFX9-LABEL: f32_oeq_v_i:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, 0x3e7ae148
+; GFX9-NEXT:    v_cmp_neq_f32_e32 vcc, s4, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: f32_oeq_v_i:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 0x3e7ae148, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x3e7ae148, v1, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %fcmp = fcmp oeq float %arg, 0x3FCF5C2900000000
+  %select = select i1 %fcmp, float 0x3FCF5C2900000000, float %arg1
+  ret float %select
+}
+
+define float @f32_oeq_i_v(float %arg, float %arg1) {
+; GFX9-LABEL: f32_oeq_i_v:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, 0x3e7ae148
+; GFX9-NEXT:    v_cmp_neq_f32_e32 vcc, s4, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: f32_oeq_i_v:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 0x3e7ae148, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x3e7ae148, v1, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %fcmp = fcmp oeq float 0x3FCF5C2900000000, %arg
+  %select = select i1 %fcmp, float 0x3FCF5C2900000000, float %arg1
+  ret float %select
+}
+
+define float @f32_one_v_i(float %arg, float %arg1) {
+; GFX9-LABEL: f32_one_v_i:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, 0x3e7ae148
+; GFX9-NEXT:    v_cmp_lg_f32_e32 vcc, s4, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: f32_one_v_i:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cmp_lg_f32_e32 vcc_lo, 0x3e7ae148, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x3e7ae148, v1, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %fcmp = fcmp one float %arg, 0x3FCF5C2900000000
+  %select = select i1 %fcmp, float %arg1, float 0x3FCF5C2900000000
+  ret float %select
+}
+
+define float @f32_one_i_v(float %arg, float %arg1) {
+; GFX9-LABEL: f32_one_i_v:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, 0x3e7ae148
+; GFX9-NEXT:    v_cmp_lg_f32_e32 vcc, s4, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: f32_one_i_v:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cmp_lg_f32_e32 vcc_lo, 0x3e7ae148, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x3e7ae148, v1, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %fcmp = fcmp one float %arg, 0x3FCF5C2900000000
+  %select = select i1 %fcmp, float %arg1, float 0x3FCF5C2900000000
+  ret float %select
+}
+
+define i32 @i32_eq_v_i(i32 %arg, i32 %arg1) {
+; GFX9-LABEL: i32_eq_v_i:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, 0x67932
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: i32_eq_v_i:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0x67932, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x67932, v1, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %icmp = icmp eq i32 %arg, 424242
+  %select = select i1 %icmp, i32 424242, i32 %arg1
+  ret i32 %select
+}
+
+define i32 @i32_eq_i_v(i32 %arg, i32 %arg1) {
+; GFX9-LABEL: i32_eq_i_v:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, 0x67932
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: i32_eq_i_v:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0x67932, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x67932, v1, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %icmp = icmp eq i32 424242, %arg
+  %select = select i1 %icmp, i32 424242, i32 %arg1
+  ret i32 %select
+}
+
+define i32 @i32_ne_v_i(i32 %arg, i32 %arg1) {
+; GFX9-LABEL: i32_ne_v_i:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, 0x67932
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: i32_ne_v_i:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0x67932, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x67932, v1, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %icmp = icmp ne i32 %arg, 424242
+  %select = select i1 %icmp, i32 %arg1, i32 424242
+  ret i32 %select
+}
+
+define i32 @i32_ne_i_v(i32 %arg, i32 %arg1) {
+; GFX9-LABEL: i32_ne_i_v:
+; GFX9:       ; %bb.0: ; %bb
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, 0x67932
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, s4, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: i32_ne_i_v:
+; GFX10:       ; %bb.0: ; %bb
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0x67932, v0
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x67932, v1, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+bb:
+  %icmp = icmp ne i32 424242, %arg
+  %select = select i1 %icmp, i32 %arg1, i32 424242
+  ret i32 %select
+}
----------------
arsenm wrote:

Test half, i16, and bfloat cases. Plus 64-bit



https://github.com/llvm/llvm-project/pull/131146


More information about the llvm-commits mailing list