[llvm] [NFC][GlobalISel] Pre-commit GISel AMDGPU tests for XOR, OR, AND (PR #138586)

via llvm-commits llvm-commits at lists.llvm.org
Mon May 5 14:08:06 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-amdgpu

Author: Chinmay Deshpande (chinmaydd)

<details>
<summary>Changes</summary>

Modeled after `llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll`

---

Patch is 89.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/138586.diff


3 Files Affected:

- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll (+839) 
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/or.ll (+843) 
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/xor.ll (+843) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll
new file mode 100644
index 0000000000000..85982cea602ed
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll
@@ -0,0 +1,839 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16, -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16, -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
+
+define amdgpu_ps i16 @s_and_i16(i16 inreg %num, i16 inreg %den) {
+; GCN-LABEL: s_and_i16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_and_b32 s0, s0, s1
+; GCN-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_i16:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_and_b32 s0, s0, s1
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_and_b32 s0, s0, s1
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and i16 %num, %den
+  ret i16 %result
+}
+
+define i16 @v_and_i16(i16 %num, i16 %den) {
+; GCN-LABEL: v_and_i16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10PLUS-LABEL: v_and_i16:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_and_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = and i16 %num, %den
+  ret i16 %result
+}
+
+define amdgpu_ps zeroext i16 @s_and_i16_zeroext(i16 inreg zeroext %num, i16 inreg zeroext %den) {
+; GFX7-LABEL: s_and_i16_zeroext:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_and_b32 s0, s0, s1
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_and_i16_zeroext:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_and_b32 s0, s0, s1
+; GFX8-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: s_and_i16_zeroext:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_and_b32 s0, s0, s1
+; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_i16_zeroext:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_and_b32 s0, s0, s1
+; GFX10PLUS-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_i16_zeroext:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_and_b32 s0, s0, s1
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    s_and_b32 s0, s0, 0xffff
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and i16 %num, %den
+  ret i16 %result
+}
+
+define zeroext i16 @v_and_i16_zeroext(i16 zeroext %num, i16 zeroext %den) {
+; GCN-LABEL: v_and_i16_zeroext:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10PLUS-LABEL: v_and_i16_zeroext:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_and_i16_zeroext:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = and i16 %num, %den
+  ret i16 %result
+}
+
+define amdgpu_ps signext i16 @s_and_i16_signext(i16 inreg signext %num, i16 inreg signext %den) {
+; GFX7-LABEL: s_and_i16_signext:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_and_b32 s0, s0, s1
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_and_i16_signext:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_and_b32 s0, s0, s1
+; GFX8-NEXT:    s_sext_i32_i16 s0, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX9-LABEL: s_and_i16_signext:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_and_b32 s0, s0, s1
+; GFX9-NEXT:    s_sext_i32_i16 s0, s0
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_i16_signext:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_and_b32 s0, s0, s1
+; GFX10PLUS-NEXT:    s_sext_i32_i16 s0, s0
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_i16_signext:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_and_b32 s0, s0, s1
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    s_sext_i32_i16 s0, s0
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and i16 %num, %den
+  ret i16 %result
+}
+
+define signext i16 @v_and_i16_signext(i16 signext %num, i16 signext %den) {
+; GCN-LABEL: v_and_i16_signext:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10PLUS-LABEL: v_and_i16_signext:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_and_i16_signext:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = and i16 %num, %den
+  ret i16 %result
+}
+
+define amdgpu_ps i32 @s_and_i32(i32 inreg %num, i32 inreg %den) {
+; GCN-LABEL: s_and_i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_and_b32 s0, s0, s1
+; GCN-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_i32:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_and_b32 s0, s0, s1
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_and_b32 s0, s0, s1
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and i32 %num, %den
+  ret i32 %result
+}
+
+define i32 @v_and_i32(i32 %num, i32 %den) {
+; GCN-LABEL: v_and_i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10PLUS-LABEL: v_and_i32:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_and_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = and i32 %num, %den
+  ret i32 %result
+}
+
+define amdgpu_ps <2 x i32> @s_and_v2i32(<2 x i32> inreg %num, <2 x i32> inreg %den) {
+; GCN-LABEL: s_and_v2i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GCN-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_v2i32:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_v2i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and <2 x i32> %num, %den
+  ret <2 x i32> %result
+}
+
+define <2 x i32> @v_and_v2i32(<2 x i32> %num, <2 x i32> %den) {
+; GCN-LABEL: v_and_v2i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
+; GCN-NEXT:    v_and_b32_e32 v1, v1, v3
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10PLUS-LABEL: v_and_v2i32:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX10PLUS-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_and_v2i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX12-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = and <2 x i32> %num, %den
+  ret <2 x i32> %result
+}
+
+define amdgpu_cs i33 @s_and_i33(i33 inreg %num,  i33 inreg %den) {
+; GCN-LABEL: s_and_i33:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GCN-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_i33:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_i33:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and i33 %num, %den
+  ret i33 %result
+}
+
+define amdgpu_ps i64 @s_and_i64(i64 inreg %num, i64 inreg %den) {
+; GCN-LABEL: s_and_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GCN-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_i64:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_i64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and i64 %num, %den
+  ret i64 %result
+}
+
+define i64 @v_and_i64(i64 %num, i64 %den) {
+; GCN-LABEL: v_and_i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v2
+; GCN-NEXT:    v_and_b32_e32 v1, v1, v3
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10PLUS-LABEL: v_and_i64:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX10PLUS-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_and_i64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX12-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = and i64 %num, %den
+  ret i64 %result
+}
+
+define amdgpu_ps <3 x i32> @s_and_i96(i96 inreg %num, i96 inreg %den) {
+; GCN-LABEL: s_and_i96:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_mov_b32 s6, s3
+; GCN-NEXT:    s_mov_b32 s7, s4
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], s[6:7]
+; GCN-NEXT:    s_and_b32 s2, s2, s5
+; GCN-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_i96:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_mov_b32 s6, s3
+; GFX10PLUS-NEXT:    s_mov_b32 s7, s4
+; GFX10PLUS-NEXT:    s_and_b32 s2, s2, s5
+; GFX10PLUS-NEXT:    s_and_b64 s[0:1], s[0:1], s[6:7]
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_i96:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_mov_b32 s6, s3
+; GFX12-NEXT:    s_mov_b32 s7, s4
+; GFX12-NEXT:    s_and_b32 s2, s2, s5
+; GFX12-NEXT:    s_and_b64 s[0:1], s[0:1], s[6:7]
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and i96 %num, %den
+  %cast = bitcast i96 %result to <3 x i32>
+  ret <3 x i32> %cast
+}
+
+define i96 @v_and_i96(i96 %num, i96 %den) {
+; GCN-LABEL: v_and_i96:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v3
+; GCN-NEXT:    v_and_b32_e32 v1, v1, v4
+; GCN-NEXT:    v_and_b32_e32 v2, v2, v5
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10PLUS-LABEL: v_and_i96:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v3
+; GFX10PLUS-NEXT:    v_and_b32_e32 v1, v1, v4
+; GFX10PLUS-NEXT:    v_and_b32_e32 v2, v2, v5
+; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_and_i96:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_and_b32_e32 v0, v0, v3
+; GFX12-NEXT:    v_and_b32_e32 v1, v1, v4
+; GFX12-NEXT:    v_and_b32_e32 v2, v2, v5
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = and i96 %num, %den
+  ret i96 %result
+}
+
+define amdgpu_ps <4 x i32> @s_and_i128(i128 inreg %num, i128 inreg %den) {
+; GCN-LABEL: s_and_i128:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
+; GCN-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GCN-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_i128:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
+; GFX10PLUS-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_i128:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
+; GFX12-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and i128 %num, %den
+  %cast = bitcast i128 %result to <4 x i32>
+  ret <4 x i32> %cast
+}
+
+define i128 @v_and_i128(i128 %num, i128 %den) {
+; GCN-LABEL: v_and_i128:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v4
+; GCN-NEXT:    v_and_b32_e32 v1, v1, v5
+; GCN-NEXT:    v_and_b32_e32 v2, v2, v6
+; GCN-NEXT:    v_and_b32_e32 v3, v3, v7
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10PLUS-LABEL: v_and_i128:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v4
+; GFX10PLUS-NEXT:    v_and_b32_e32 v1, v1, v5
+; GFX10PLUS-NEXT:    v_and_b32_e32 v2, v2, v6
+; GFX10PLUS-NEXT:    v_and_b32_e32 v3, v3, v7
+; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_and_i128:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_and_b32_e32 v0, v0, v4
+; GFX12-NEXT:    v_and_b32_e32 v1, v1, v5
+; GFX12-NEXT:    v_and_b32_e32 v2, v2, v6
+; GFX12-NEXT:    v_and_b32_e32 v3, v3, v7
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = and i128 %num, %den
+  ret i128 %result
+}
+
+define amdgpu_ps <8 x i32> @s_and_i256(i256 inreg %num, i256 inreg %den) {
+; GCN-LABEL: s_and_i256:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], s[8:9]
+; GCN-NEXT:    s_and_b64 s[2:3], s[2:3], s[10:11]
+; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], s[12:13]
+; GCN-NEXT:    s_and_b64 s[6:7], s[6:7], s[14:15]
+; GCN-NEXT:    ; return to shader part epilog
+;
+; GFX10PLUS-LABEL: s_and_i256:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_and_b64 s[0:1], s[0:1], s[8:9]
+; GFX10PLUS-NEXT:    s_and_b64 s[2:3], s[2:3], s[10:11]
+; GFX10PLUS-NEXT:    s_and_b64 s[4:5], s[4:5], s[12:13]
+; GFX10PLUS-NEXT:    s_and_b64 s[6:7], s[6:7], s[14:15]
+; GFX10PLUS-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: s_and_i256:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_and_b64 s[0:1], s[0:1], s[8:9]
+; GFX12-NEXT:    s_and_b64 s[2:3], s[2:3], s[10:11]
+; GFX12-NEXT:    s_and_b64 s[4:5], s[4:5], s[12:13]
+; GFX12-NEXT:    s_and_b64 s[6:7], s[6:7], s[14:15]
+; GFX12-NEXT:    ; return to shader part epilog
+  %result = and i256 %num, %den
+  %cast = bitcast i256 %result to <8 x i32>
+  ret <8 x i32> %cast
+}
+
+define i256 @v_and_i256(i256 %num, i256 %den) {
+; GCN-LABEL: v_and_i256:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_and_b32_e32 v0, v0, v8
+; GCN-NEXT:    v_and_b32_e32 v1, v1, v9
+; GCN-NEXT:    v_and_b32_e32 v2, v2, v10
+; GCN-NEXT:    v_and_b32_e32 v3, v3, v11
+; GCN-NEXT:    v_and_b32_e32 v4, v4, v12
+; GCN-NEXT:    v_and_b32_e32 v5, v5, v13
+; GCN-NEXT:    v_and_b32_e32 v6, v6, v14
+; GCN-NEXT:    v_and_b32_e32 v7, v7, v15
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10PLUS-LABEL: v_and_i256:
+; GFX10PLUS:       ; %bb.0:
+; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10PLUS-NEXT:    v_and_b32_e32 v0, v0, v8
+; GFX10PLUS-NEXT:    v_and_b32_e32 v1, v1, v9
+; GFX10PLUS-NEXT:    v_and_b32_e32 v2, v2, v10
+; GFX10PLUS-NEXT:    v_and_b32_e32 v3, v3, v11
+; GFX10PLUS-NEXT:    v_and_b32_e32 v4, v4, v12
+; GFX10PLUS-NEXT:    v_and_b32_e32 v5, v5, v13
+; GFX10PLUS-NEXT:    v_and_b32_e32 v6, v6, v14
+; GFX10PLUS-NEXT:    v_and_b32_e32 v7, v7, v15
+; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_and_i256:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT:    s_wait_expcnt 0x0
+; GFX12-NEXT:    s_wait_samplecnt 0x0
+; GFX12-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    v_and_b32_e32 v0, v0, v8
+; GFX12-NEXT:    v_and_b32_e32 v1, v1, v9
+; GFX12-NEXT:    v_and_b32_e32 v2, v2, v10
+; GFX12-NEXT:    v_and_b32_e32 v3, v3, v11
+; GFX12-NEXT:    v_and_b32_e32 v4, v4, v12
+; GFX12-NEXT:    v_and_b32_e32 v5, v5, v13
+; GFX12-NEXT:    v_and_b32_e32 v6, v6, v14
+; GFX12-NEXT:    v_and_b32_e32 v7, v7, v15
+; GFX12-NEXT:    s_setpc_b64 s[30:31]
+  %result = and i256 %num, %den
+  ret i256 %result
+}
+
+define amdgpu_ps void @s_and_u64_zext_with_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; GFX7-LABEL: s_and_u64_zext_with_vregs:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_mov_b32 s2, 0
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b64 s[0:1], 0
+; GFX7-NEXT:    buffer_load_dword v2, v[2:3], s[0:3], 0 addr64
+; GFX7-NEXT:    v_mov_b32_e32 v3, 0
+; GFX7-NEXT:    s_waitcnt vmcnt(0)
+; GFX7-NEXT:    v_and_b32_e32 v2, 0x50, v2
+; GFX7-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: s_and_u64_zext_with_vregs:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    flat_load_dword v2, v[2:3]
+; GFX8-NEXT:    v_mov_b32_e32 v3, 0
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    v_and_b32_e32 v2, 0x50, v2
+; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; GFX8-NEXT:    s_endpgm
+;
+; GFX9-LABEL: s_and_u64_zext_with_vregs:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_and_b32_e32 v2, 0x50, v2
+; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
+; GFX9-NEXT:    s_endpgm
+;
+; GFX10-LABEL: s_and_u64_zext_with_vregs:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    global_load_dword v2, v[2:3], off
+; GFX10-NEXT:    v_mov_b32_e32 v3, 0
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    v_and_b32_e32 v2, 0x50, v2
+; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_and_u64_zext_with_vregs:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    global_load_b32 v...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/138586


More information about the llvm-commits mailing list