[llvm] 6376418 - AMDGPU: Add baseline test for add64 with constant test (#122048)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 8 07:30:08 PST 2025


Author: Matt Arsenault
Date: 2025-01-08T22:30:04+07:00
New Revision: 637641840d170491ad99a92a23112e0625d7c6cb

URL: https://github.com/llvm/llvm-project/commit/637641840d170491ad99a92a23112e0625d7c6cb
DIFF: https://github.com/llvm/llvm-project/commit/637641840d170491ad99a92a23112e0625d7c6cb.diff

LOG: AMDGPU: Add baseline test for add64 with constant test (#122048)

Add baseline test for 64-bit adds when the low half of
an operand is known 0.

Added: 
    llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll
    llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll b/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll
new file mode 100644
index 00000000000000..981e33f89d956e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/add64-low-32-bits-known-zero.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+
+; Reduce a 64-bit add by a constant if we know the low 32-bits are all
+; zero.
+
+; add i64:x, K if computeTrailingZeros(K) >= 32
+; => build_pair (add x.hi, K.hi), x.lo
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_0(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0x40000
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, 1125899906842624 ; (1 << 50)
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_1(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, 4294967296 ; (1 << 32)
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_2(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 2
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, 8589934592 ; (1 << 33)
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_3(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, -9223372036854775808 ; (1 << 63)
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_4(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_low_bits_known0_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, -4294967296 ; 0xffffffff00000000
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_0(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0x40000
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, 1125899906842624 ; (1 << 50)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_1(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, 4294967296 ; (1 << 32)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_2(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, 8589934592 ; (1 << 33)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_3(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, -9223372036854775808 ; (1 << 63)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_low_bits_known0_4(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_low_bits_known0_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, -4294967296 ; 0xffffffff00000000
+  ret i64 %add
+}
+
+define amdgpu_ps i64 @s_add_i64_const_high_bits_known0_0(i64 inreg %reg) {
+; GFX9-LABEL: s_add_i64_const_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, -1
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add i64 %reg, 4294967295 ; (1 << 31)
+  ret i64 %add
+}
+
+define i64 @v_add_i64_const_high_bits_known0_0(i64 %reg) {
+; GFX9-LABEL: v_add_i64_const_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, -1, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add i64 %reg, 4294967295 ; (1 << 31)
+  ret i64 %add
+}
+
+define <2 x i64> @v_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) {
+; GFX9-LABEL: v_add_v2i64_splat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 1, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
+  ret <2 x i64> %add
+}
+
+define <2 x i64> @v_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
+; GFX9-LABEL: v_add_v2i64_nonsplat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 2, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
+  ret <2 x i64> %add
+}
+
+define amdgpu_ps <2 x i64> @s_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
+; GFX9-LABEL: s_add_v2i64_splat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-NEXT:    s_add_u32 s2, s2, 0
+; GFX9-NEXT:    s_addc_u32 s3, s3, 1
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
+  ret <2 x i64> %add
+}
+
+define amdgpu_ps <2 x i64> @s_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
+; GFX9-LABEL: s_add_v2i64_nonsplat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-NEXT:    s_add_u32 s2, s2, 0
+; GFX9-NEXT:    s_addc_u32 s3, s3, 2
+; GFX9-NEXT:    ; return to shader part epilog
+  %add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
+  ret <2 x i64> %add
+}
+
+; We could reduce this to use a 32-bit add if we use computeKnownBits
+define i64 @v_add_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
+; GFX9-LABEL: v_add_i64_variable_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
+  %in.high.bits = shl i64 %zext.offset.hi32, 32
+  %add = add i64 %reg, %in.high.bits
+  ret i64 %add
+}
+
+; We could reduce this to use a 32-bit add if we use computeKnownBits
+define amdgpu_ps i64 @s_add_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
+; GFX9-LABEL: s_add_i64_variable_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, s2
+; GFX9-NEXT:    ; return to shader part epilog
+  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
+  %in.high.bits = shl i64 %zext.offset.hi32, 32
+  %add = add i64 %reg, %in.high.bits
+  ret i64 %add
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll b/llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll
new file mode 100644
index 00000000000000..779c4aef647b41
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sub64-low-32-bits-known-zero.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+
+; Reduce a 64-bit sub by a constant if we know the low 32-bits are all
+; zero.
+
+; sub i64:x, K if computeTrailingZeros(K) >= 32
+; => build_pair (sub x.hi, K.hi), x.lo
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_0(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0xfffc0000
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, 1125899906842624 ; (1 << 50)
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_1(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, 4294967296 ; (1 << 32)
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_2(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -2
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, 8589934592 ; (1 << 33)
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_3(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0x80000000
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, -9223372036854775808 ; (1 << 63)
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_4(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_low_bits_known0_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, 1
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_0(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0xfffc0000
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, 1125899906842624 ; (1 << 50)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_1(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_1:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, 4294967296 ; (1 << 32)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_2(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -2, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, 8589934592 ; (1 << 33)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_3(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, -9223372036854775808 ; (1 << 63)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_low_bits_known0_4(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_low_bits_known0_4:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000
+  ret i64 %sub
+}
+
+define amdgpu_ps i64 @s_sub_i64_const_high_bits_known0_0(i64 inreg %reg) {
+; GFX9-LABEL: s_sub_i64_const_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 1
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub i64 %reg, 4294967295 ; (1 << 31)
+  ret i64 %sub
+}
+
+define i64 @v_sub_i64_const_high_bits_known0_0(i64 %reg) {
+; GFX9-LABEL: v_sub_i64_const_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub i64 %reg, 4294967295 ; (1 << 31)
+  ret i64 %sub
+}
+
+define <2 x i64> @v_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) {
+; GFX9-LABEL: v_sub_v2i64_splat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @v_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
+; GFX9-LABEL: v_sub_v2i64_nonsplat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, 0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, -2, v3, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %sub = sub <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
+  ret <2 x i64> %sub
+}
+
+define amdgpu_ps <2 x i64> @s_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
+; GFX9-LABEL: s_sub_v2i64_splat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    s_add_u32 s2, s2, 0
+; GFX9-NEXT:    s_addc_u32 s3, s3, -1
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
+  ret <2 x i64> %sub
+}
+
+define amdgpu_ps <2 x i64> @s_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
+; GFX9-LABEL: s_sub_v2i64_nonsplat_const_low_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_add_u32 s0, s0, 0
+; GFX9-NEXT:    s_addc_u32 s1, s1, -1
+; GFX9-NEXT:    s_add_u32 s2, s2, 0
+; GFX9-NEXT:    s_addc_u32 s3, s3, -2
+; GFX9-NEXT:    ; return to shader part epilog
+  %sub = sub <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
+  ret <2 x i64> %sub
+}
+
+; We could reduce this to use a 32-bit sub if we use computeKnownBits
+define i64 @v_sub_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
+; GFX9-LABEL: v_sub_i64_variable_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_subrev_co_u32_e32 v0, vcc, 0, v0
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
+  %in.high.bits = shl i64 %zext.offset.hi32, 32
+  %sub = sub i64 %reg, %in.high.bits
+  ret i64 %sub
+}
+
+; We could reduce this to use a 32-bit sub if we use computeKnownBits
+define amdgpu_ps i64 @s_sub_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
+; GFX9-LABEL: s_sub_i64_variable_high_bits_known0_0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_sub_u32 s0, s0, 0
+; GFX9-NEXT:    s_subb_u32 s1, s1, s2
+; GFX9-NEXT:    ; return to shader part epilog
+  %zext.offset.hi32 = zext i32 %offset.hi32 to i64
+  %in.high.bits = shl i64 %zext.offset.hi32, 32
+  %sub = sub i64 %reg, %in.high.bits
+  ret i64 %sub
+}


        


More information about the llvm-commits mailing list