[llvm] add tests for loop definition of bitconvert (PR #133052)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 26 01:44:21 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: None (Shoreshen)

<details>
<summary>Changes</summary>

All tests passed due to:
1. For DAG, pattern will not separate SReg and VReg. One of the sample is:
    ```
define <2 x double> @<!-- -->v_bitcast_v4f32_to_v2f64(<4 x float> inreg %a, i32 %b) {
  %cmp = icmp eq i32 %b, 0
  br i1 %cmp, label %cmp.true, label %cmp.false

cmp.true:
  %a1 = fadd <4 x float> %a, splat (float 1.000000e+00)
  %a2 = bitcast <4 x float> %a1 to <2 x double>
  br label %end

cmp.false:
  %a3 = bitcast <4 x float> %a to <2 x double>
  br label %end

end:
  %phi = phi <2 x double> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
  ret <2 x double> %phi
}
   ```
   It suppose to select from scalar register patterns. But the Vreg pattern is matched is as follow:
    ```
Debug log:
ISEL: Starting selection on root node: t3: v2f64 = bitcast t2
ISEL: Starting pattern match
  Initial Opcode index to 440336
  Skipped scope entry (due to false predicate) at index 440339, continuing at 440367
  Skipped scope entry (due to false predicate) at index 440368, continuing at 440396
  Skipped scope entry (due to false predicate) at index 440397, continuing at 440435
  Skipped scope entry (due to false predicate) at index 440436, continuing at 440467
  Skipped scope entry (due to false predicate) at index 440468, continuing at 440499
  Skipped scope entry (due to false predicate) at index 440500, continuing at 440552
  Skipped scope entry (due to false predicate) at index 440553, continuing at 440587
  Skipped scope entry (due to false predicate) at index 440588, continuing at 440622
  Skipped scope entry (due to false predicate) at index 440623, continuing at 440657
  Skipped scope entry (due to false predicate) at index 440658, continuing at 440692
  Skipped scope entry (due to false predicate) at index 440693, continuing at 440727
  Skipped scope entry (due to false predicate) at index 440728, continuing at 440769
  Skipped scope entry (due to false predicate) at index 440770, continuing at 440798
  Skipped scope entry (due to false predicate) at index 440799, continuing at 440836
  Skipped scope entry (due to false predicate) at index 440837, continuing at 440870
  TypeSwitch[v2f64] from 440873 to 440892

Patterns:
/*440892*/    OPC_CompleteMatch, 1, 0, 
               // Src: (bitconvert:{ *:[v2f64] } VReg_128:{ *:[v4f32] }:$src0) - Complexity = 3
               // Dst: VReg_128:{ *:[v2f64] }:$src0
    ```
2. Global isel will use `Select_COPY` to select bitcast

---

Patch is 1.78 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133052.diff


16 Files Affected:

- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll (+2394) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.128bit.ll (+6084) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.160bit.ll (+178) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.16bit.ll (+556) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.192bit.ll (+1062) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.224bit.ll (+194) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.256bit.ll (+9118) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.288bit.ll (+209) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.320bit.ll (+220) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.32bit.ll (+1960) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.352bit.ll (+228) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.384bit.ll (+235) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.512bit.ll (+15566) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.64bit.ll (+4574) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.96bit.ll (+163) 
- (modified) llvm/test/lit.cfg.py (+1-1) 


``````````diff
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
new file mode 100644
index 0000000000000..9134339cd1665
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.1024bit.ll
@@ -0,0 +1,2394 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc -mtriple=amdgcn -amdgpu-codegenprepare-break-large-phis-threshold=4096 < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tonga -amdgpu-codegenprepare-break-large-phis-threshold=4096 < %s | FileCheck -check-prefixes=VI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -amdgpu-codegenprepare-break-large-phis-threshold=4096 < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-codegenprepare-break-large-phis-threshold=4096 < %s | FileCheck -check-prefixes=GFX11 %s
+
+define <32 x float> @v_bitcast_v32i32_to_v32f32(<32 x i32> %a, i32 %b) {
+; GCN-LABEL: v_bitcast_v32i32_to_v32f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
+; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; GCN-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v32
+; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GCN-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GCN-NEXT:    s_cbranch_execz .LBB0_2
+; GCN-NEXT:  ; %bb.1: ; %cmp.true
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_add_i32_e32 v31, vcc, 3, v31
+; GCN-NEXT:    v_add_i32_e32 v30, vcc, 3, v30
+; GCN-NEXT:    v_add_i32_e32 v29, vcc, 3, v29
+; GCN-NEXT:    v_add_i32_e32 v28, vcc, 3, v28
+; GCN-NEXT:    v_add_i32_e32 v27, vcc, 3, v27
+; GCN-NEXT:    v_add_i32_e32 v26, vcc, 3, v26
+; GCN-NEXT:    v_add_i32_e32 v25, vcc, 3, v25
+; GCN-NEXT:    v_add_i32_e32 v24, vcc, 3, v24
+; GCN-NEXT:    v_add_i32_e32 v23, vcc, 3, v23
+; GCN-NEXT:    v_add_i32_e32 v22, vcc, 3, v22
+; GCN-NEXT:    v_add_i32_e32 v21, vcc, 3, v21
+; GCN-NEXT:    v_add_i32_e32 v20, vcc, 3, v20
+; GCN-NEXT:    v_add_i32_e32 v19, vcc, 3, v19
+; GCN-NEXT:    v_add_i32_e32 v18, vcc, 3, v18
+; GCN-NEXT:    v_add_i32_e32 v17, vcc, 3, v17
+; GCN-NEXT:    v_add_i32_e32 v16, vcc, 3, v16
+; GCN-NEXT:    v_add_i32_e32 v15, vcc, 3, v15
+; GCN-NEXT:    v_add_i32_e32 v14, vcc, 3, v14
+; GCN-NEXT:    v_add_i32_e32 v13, vcc, 3, v13
+; GCN-NEXT:    v_add_i32_e32 v12, vcc, 3, v12
+; GCN-NEXT:    v_add_i32_e32 v11, vcc, 3, v11
+; GCN-NEXT:    v_add_i32_e32 v10, vcc, 3, v10
+; GCN-NEXT:    v_add_i32_e32 v9, vcc, 3, v9
+; GCN-NEXT:    v_add_i32_e32 v8, vcc, 3, v8
+; GCN-NEXT:    v_add_i32_e32 v7, vcc, 3, v7
+; GCN-NEXT:    v_add_i32_e32 v6, vcc, 3, v6
+; GCN-NEXT:    v_add_i32_e32 v5, vcc, 3, v5
+; GCN-NEXT:    v_add_i32_e32 v4, vcc, 3, v4
+; GCN-NEXT:    v_add_i32_e32 v3, vcc, 3, v3
+; GCN-NEXT:    v_add_i32_e32 v2, vcc, 3, v2
+; GCN-NEXT:    v_add_i32_e32 v1, vcc, 3, v1
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, 3, v0
+; GCN-NEXT:  .LBB0_2: ; %end
+; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_bitcast_v32i32_to_v32f32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v31
+; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; VI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; VI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; VI-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; VI-NEXT:    s_cbranch_execz .LBB0_2
+; VI-NEXT:  ; %bb.1: ; %cmp.true
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_u32_e32 v31, vcc, 3, v31
+; VI-NEXT:    v_add_u32_e32 v30, vcc, 3, v30
+; VI-NEXT:    v_add_u32_e32 v29, vcc, 3, v29
+; VI-NEXT:    v_add_u32_e32 v28, vcc, 3, v28
+; VI-NEXT:    v_add_u32_e32 v27, vcc, 3, v27
+; VI-NEXT:    v_add_u32_e32 v26, vcc, 3, v26
+; VI-NEXT:    v_add_u32_e32 v25, vcc, 3, v25
+; VI-NEXT:    v_add_u32_e32 v24, vcc, 3, v24
+; VI-NEXT:    v_add_u32_e32 v23, vcc, 3, v23
+; VI-NEXT:    v_add_u32_e32 v22, vcc, 3, v22
+; VI-NEXT:    v_add_u32_e32 v21, vcc, 3, v21
+; VI-NEXT:    v_add_u32_e32 v20, vcc, 3, v20
+; VI-NEXT:    v_add_u32_e32 v19, vcc, 3, v19
+; VI-NEXT:    v_add_u32_e32 v18, vcc, 3, v18
+; VI-NEXT:    v_add_u32_e32 v17, vcc, 3, v17
+; VI-NEXT:    v_add_u32_e32 v16, vcc, 3, v16
+; VI-NEXT:    v_add_u32_e32 v15, vcc, 3, v15
+; VI-NEXT:    v_add_u32_e32 v14, vcc, 3, v14
+; VI-NEXT:    v_add_u32_e32 v13, vcc, 3, v13
+; VI-NEXT:    v_add_u32_e32 v12, vcc, 3, v12
+; VI-NEXT:    v_add_u32_e32 v11, vcc, 3, v11
+; VI-NEXT:    v_add_u32_e32 v10, vcc, 3, v10
+; VI-NEXT:    v_add_u32_e32 v9, vcc, 3, v9
+; VI-NEXT:    v_add_u32_e32 v8, vcc, 3, v8
+; VI-NEXT:    v_add_u32_e32 v7, vcc, 3, v7
+; VI-NEXT:    v_add_u32_e32 v6, vcc, 3, v6
+; VI-NEXT:    v_add_u32_e32 v5, vcc, 3, v5
+; VI-NEXT:    v_add_u32_e32 v4, vcc, 3, v4
+; VI-NEXT:    v_add_u32_e32 v3, vcc, 3, v3
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 3, v2
+; VI-NEXT:    v_add_u32_e32 v1, vcc, 3, v1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, 3, v0
+; VI-NEXT:  .LBB0_2: ; %end
+; VI-NEXT:    s_or_b64 exec, exec, s[4:5]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_bitcast_v32i32_to_v32f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v31
+; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GFX9-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX9-NEXT:    s_cbranch_execz .LBB0_2
+; GFX9-NEXT:  ; %bb.1: ; %cmp.true
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_u32_e32 v31, 3, v31
+; GFX9-NEXT:    v_add_u32_e32 v30, 3, v30
+; GFX9-NEXT:    v_add_u32_e32 v29, 3, v29
+; GFX9-NEXT:    v_add_u32_e32 v28, 3, v28
+; GFX9-NEXT:    v_add_u32_e32 v27, 3, v27
+; GFX9-NEXT:    v_add_u32_e32 v26, 3, v26
+; GFX9-NEXT:    v_add_u32_e32 v25, 3, v25
+; GFX9-NEXT:    v_add_u32_e32 v24, 3, v24
+; GFX9-NEXT:    v_add_u32_e32 v23, 3, v23
+; GFX9-NEXT:    v_add_u32_e32 v22, 3, v22
+; GFX9-NEXT:    v_add_u32_e32 v21, 3, v21
+; GFX9-NEXT:    v_add_u32_e32 v20, 3, v20
+; GFX9-NEXT:    v_add_u32_e32 v19, 3, v19
+; GFX9-NEXT:    v_add_u32_e32 v18, 3, v18
+; GFX9-NEXT:    v_add_u32_e32 v17, 3, v17
+; GFX9-NEXT:    v_add_u32_e32 v16, 3, v16
+; GFX9-NEXT:    v_add_u32_e32 v15, 3, v15
+; GFX9-NEXT:    v_add_u32_e32 v14, 3, v14
+; GFX9-NEXT:    v_add_u32_e32 v13, 3, v13
+; GFX9-NEXT:    v_add_u32_e32 v12, 3, v12
+; GFX9-NEXT:    v_add_u32_e32 v11, 3, v11
+; GFX9-NEXT:    v_add_u32_e32 v10, 3, v10
+; GFX9-NEXT:    v_add_u32_e32 v9, 3, v9
+; GFX9-NEXT:    v_add_u32_e32 v8, 3, v8
+; GFX9-NEXT:    v_add_u32_e32 v7, 3, v7
+; GFX9-NEXT:    v_add_u32_e32 v6, 3, v6
+; GFX9-NEXT:    v_add_u32_e32 v5, 3, v5
+; GFX9-NEXT:    v_add_u32_e32 v4, 3, v4
+; GFX9-NEXT:    v_add_u32_e32 v3, 3, v3
+; GFX9-NEXT:    v_add_u32_e32 v2, 3, v2
+; GFX9-NEXT:    v_add_u32_e32 v1, 3, v1
+; GFX9-NEXT:    v_add_u32_e32 v0, 3, v0
+; GFX9-NEXT:  .LBB0_2: ; %end
+; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_bitcast_v32i32_to_v32f32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:4
+; GFX11-NEXT:    scratch_load_b32 v31, off, s32
+; GFX11-NEXT:    s_mov_b32 s0, exec_lo
+; GFX11-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-NEXT:    v_cmpx_ne_u32_e32 0, v32
+; GFX11-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_not1_saveexec_b32 s0, s0
+; GFX11-NEXT:    s_cbranch_execz .LBB0_2
+; GFX11-NEXT:  ; %bb.1: ; %cmp.true
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_add_nc_u32_e32 v31, 3, v31
+; GFX11-NEXT:    v_add_nc_u32_e32 v30, 3, v30
+; GFX11-NEXT:    v_add_nc_u32_e32 v29, 3, v29
+; GFX11-NEXT:    v_add_nc_u32_e32 v28, 3, v28
+; GFX11-NEXT:    v_add_nc_u32_e32 v27, 3, v27
+; GFX11-NEXT:    v_add_nc_u32_e32 v26, 3, v26
+; GFX11-NEXT:    v_add_nc_u32_e32 v25, 3, v25
+; GFX11-NEXT:    v_add_nc_u32_e32 v24, 3, v24
+; GFX11-NEXT:    v_add_nc_u32_e32 v23, 3, v23
+; GFX11-NEXT:    v_add_nc_u32_e32 v22, 3, v22
+; GFX11-NEXT:    v_add_nc_u32_e32 v21, 3, v21
+; GFX11-NEXT:    v_add_nc_u32_e32 v20, 3, v20
+; GFX11-NEXT:    v_add_nc_u32_e32 v19, 3, v19
+; GFX11-NEXT:    v_add_nc_u32_e32 v18, 3, v18
+; GFX11-NEXT:    v_add_nc_u32_e32 v17, 3, v17
+; GFX11-NEXT:    v_add_nc_u32_e32 v16, 3, v16
+; GFX11-NEXT:    v_add_nc_u32_e32 v15, 3, v15
+; GFX11-NEXT:    v_add_nc_u32_e32 v14, 3, v14
+; GFX11-NEXT:    v_add_nc_u32_e32 v13, 3, v13
+; GFX11-NEXT:    v_add_nc_u32_e32 v12, 3, v12
+; GFX11-NEXT:    v_add_nc_u32_e32 v11, 3, v11
+; GFX11-NEXT:    v_add_nc_u32_e32 v10, 3, v10
+; GFX11-NEXT:    v_add_nc_u32_e32 v9, 3, v9
+; GFX11-NEXT:    v_add_nc_u32_e32 v8, 3, v8
+; GFX11-NEXT:    v_add_nc_u32_e32 v7, 3, v7
+; GFX11-NEXT:    v_add_nc_u32_e32 v6, 3, v6
+; GFX11-NEXT:    v_add_nc_u32_e32 v5, 3, v5
+; GFX11-NEXT:    v_add_nc_u32_e32 v4, 3, v4
+; GFX11-NEXT:    v_add_nc_u32_e32 v3, 3, v3
+; GFX11-NEXT:    v_add_nc_u32_e32 v2, 3, v2
+; GFX11-NEXT:    v_add_nc_u32_e32 v1, 3, v1
+; GFX11-NEXT:    v_add_nc_u32_e32 v0, 3, v0
+; GFX11-NEXT:  .LBB0_2: ; %end
+; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+
+cmp.true:
+  %a1 = add <32 x i32> %a, splat (i32 3)
+  %a2 = bitcast <32 x i32> %a1 to <32 x float>
+  br label %end
+
+cmp.false:
+  %a3 = bitcast <32 x i32> %a to <32 x float>
+  br label %end
+
+end:
+  %phi = phi <32 x float> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x float> %phi
+}
+
+define <32 x i32> @v_bitcast_v32f32_to_v32i32(<32 x float> %a, i32 %b) {
+; GCN-LABEL: v_bitcast_v32f32_to_v32i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
+; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; GCN-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v32
+; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GCN-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GCN-NEXT:    s_cbranch_execz .LBB1_2
+; GCN-NEXT:  ; %bb.1: ; %cmp.true
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_add_f32_e32 v31, 1.0, v31
+; GCN-NEXT:    v_add_f32_e32 v30, 1.0, v30
+; GCN-NEXT:    v_add_f32_e32 v29, 1.0, v29
+; GCN-NEXT:    v_add_f32_e32 v28, 1.0, v28
+; GCN-NEXT:    v_add_f32_e32 v27, 1.0, v27
+; GCN-NEXT:    v_add_f32_e32 v26, 1.0, v26
+; GCN-NEXT:    v_add_f32_e32 v25, 1.0, v25
+; GCN-NEXT:    v_add_f32_e32 v24, 1.0, v24
+; GCN-NEXT:    v_add_f32_e32 v23, 1.0, v23
+; GCN-NEXT:    v_add_f32_e32 v22, 1.0, v22
+; GCN-NEXT:    v_add_f32_e32 v21, 1.0, v21
+; GCN-NEXT:    v_add_f32_e32 v20, 1.0, v20
+; GCN-NEXT:    v_add_f32_e32 v19, 1.0, v19
+; GCN-NEXT:    v_add_f32_e32 v18, 1.0, v18
+; GCN-NEXT:    v_add_f32_e32 v17, 1.0, v17
+; GCN-NEXT:    v_add_f32_e32 v16, 1.0, v16
+; GCN-NEXT:    v_add_f32_e32 v15, 1.0, v15
+; GCN-NEXT:    v_add_f32_e32 v14, 1.0, v14
+; GCN-NEXT:    v_add_f32_e32 v13, 1.0, v13
+; GCN-NEXT:    v_add_f32_e32 v12, 1.0, v12
+; GCN-NEXT:    v_add_f32_e32 v11, 1.0, v11
+; GCN-NEXT:    v_add_f32_e32 v10, 1.0, v10
+; GCN-NEXT:    v_add_f32_e32 v9, 1.0, v9
+; GCN-NEXT:    v_add_f32_e32 v8, 1.0, v8
+; GCN-NEXT:    v_add_f32_e32 v7, 1.0, v7
+; GCN-NEXT:    v_add_f32_e32 v6, 1.0, v6
+; GCN-NEXT:    v_add_f32_e32 v5, 1.0, v5
+; GCN-NEXT:    v_add_f32_e32 v4, 1.0, v4
+; GCN-NEXT:    v_add_f32_e32 v3, 1.0, v3
+; GCN-NEXT:    v_add_f32_e32 v2, 1.0, v2
+; GCN-NEXT:    v_add_f32_e32 v1, 1.0, v1
+; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GCN-NEXT:  .LBB1_2: ; %end
+; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_bitcast_v32f32_to_v32i32:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v31
+; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; VI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; VI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; VI-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; VI-NEXT:    s_cbranch_execz .LBB1_2
+; VI-NEXT:  ; %bb.1: ; %cmp.true
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_add_f32_e32 v31, 1.0, v31
+; VI-NEXT:    v_add_f32_e32 v30, 1.0, v30
+; VI-NEXT:    v_add_f32_e32 v29, 1.0, v29
+; VI-NEXT:    v_add_f32_e32 v28, 1.0, v28
+; VI-NEXT:    v_add_f32_e32 v27, 1.0, v27
+; VI-NEXT:    v_add_f32_e32 v26, 1.0, v26
+; VI-NEXT:    v_add_f32_e32 v25, 1.0, v25
+; VI-NEXT:    v_add_f32_e32 v24, 1.0, v24
+; VI-NEXT:    v_add_f32_e32 v23, 1.0, v23
+; VI-NEXT:    v_add_f32_e32 v22, 1.0, v22
+; VI-NEXT:    v_add_f32_e32 v21, 1.0, v21
+; VI-NEXT:    v_add_f32_e32 v20, 1.0, v20
+; VI-NEXT:    v_add_f32_e32 v19, 1.0, v19
+; VI-NEXT:    v_add_f32_e32 v18, 1.0, v18
+; VI-NEXT:    v_add_f32_e32 v17, 1.0, v17
+; VI-NEXT:    v_add_f32_e32 v16, 1.0, v16
+; VI-NEXT:    v_add_f32_e32 v15, 1.0, v15
+; VI-NEXT:    v_add_f32_e32 v14, 1.0, v14
+; VI-NEXT:    v_add_f32_e32 v13, 1.0, v13
+; VI-NEXT:    v_add_f32_e32 v12, 1.0, v12
+; VI-NEXT:    v_add_f32_e32 v11, 1.0, v11
+; VI-NEXT:    v_add_f32_e32 v10, 1.0, v10
+; VI-NEXT:    v_add_f32_e32 v9, 1.0, v9
+; VI-NEXT:    v_add_f32_e32 v8, 1.0, v8
+; VI-NEXT:    v_add_f32_e32 v7, 1.0, v7
+; VI-NEXT:    v_add_f32_e32 v6, 1.0, v6
+; VI-NEXT:    v_add_f32_e32 v5, 1.0, v5
+; VI-NEXT:    v_add_f32_e32 v4, 1.0, v4
+; VI-NEXT:    v_add_f32_e32 v3, 1.0, v3
+; VI-NEXT:    v_add_f32_e32 v2, 1.0, v2
+; VI-NEXT:    v_add_f32_e32 v1, 1.0, v1
+; VI-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; VI-NEXT:  .LBB1_2: ; %end
+; VI-NEXT:    s_or_b64 exec, exec, s[4:5]
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_bitcast_v32f32_to_v32i32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:4
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v31
+; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GFX9-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GFX9-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX9-NEXT:    s_cbranch_execz .LBB1_2
+; GFX9-NEXT:  ; %bb.1: ; %cmp.true
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_f32_e32 v31, 1.0, v31
+; GFX9-NEXT:    v_add_f32_e32 v30, 1.0, v30
+; GFX9-NEXT:    v_add_f32_e32 v29, 1.0, v29
+; GFX9-NEXT:    v_add_f32_e32 v28, 1.0, v28
+; GFX9-NEXT:    v_add_f32_e32 v27, 1.0, v27
+; GFX9-NEXT:    v_add_f32_e32 v26, 1.0, v26
+; GFX9-NEXT:    v_add_f32_e32 v25, 1.0, v25
+; GFX9-NEXT:    v_add_f32_e32 v24, 1.0, v24
+; GFX9-NEXT:    v_add_f32_e32 v23, 1.0, v23
+; GFX9-NEXT:    v_add_f32_e32 v22, 1.0, v22
+; GFX9-NEXT:    v_add_f32_e32 v21, 1.0, v21
+; GFX9-NEXT:    v_add_f32_e32 v20, 1.0, v20
+; GFX9-NEXT:    v_add_f32_e32 v19, 1.0, v19
+; GFX9-NEXT:    v_add_f32_e32 v18, 1.0, v18
+; GFX9-NEXT:    v_add_f32_e32 v17, 1.0, v17
+; GFX9-NEXT:    v_add_f32_e32 v16, 1.0, v16
+; GFX9-NEXT:    v_add_f32_e32 v15, 1.0, v15
+; GFX9-NEXT:    v_add_f32_e32 v14, 1.0, v14
+; GFX9-NEXT:    v_add_f32_e32 v13, 1.0, v13
+; GFX9-NEXT:    v_add_f32_e32 v12, 1.0, v12
+; GFX9-NEXT:    v_add_f32_e32 v11, 1.0, v11
+; GFX9-NEXT:    v_add_f32_e32 v10, 1.0, v10
+; GFX9-NEXT:    v_add_f32_e32 v9, 1.0, v9
+; GFX9-NEXT:    v_add_f32_e32 v8, 1.0, v8
+; GFX9-NEXT:    v_add_f32_e32 v7, 1.0, v7
+; GFX9-NEXT:    v_add_f32_e32 v6, 1.0, v6
+; GFX9-NEXT:    v_add_f32_e32 v5, 1.0, v5
+; GFX9-NEXT:    v_add_f32_e32 v4, 1.0, v4
+; GFX9-NEXT:    v_add_f32_e32 v3, 1.0, v3
+; GFX9-NEXT:    v_add_f32_e32 v2, 1.0, v2
+; GFX9-NEXT:    v_add_f32_e32 v1, 1.0, v1
+; GFX9-NEXT:    v_add_f32_e32 v0, 1.0, v0
+; GFX9-NEXT:  .LBB1_2: ; %end
+; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_bitcast_v32f32_to_v32i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:4
+; GFX11-NEXT:    scratch_load_b32 v31, off, s32
+; GFX11-NEXT:    s_mov_b32 s0, exec_lo
+; GFX11-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-NEXT:    v_cmpx_ne_u32_e32 0, v32
+; GFX11-NEXT:    s_xor_b32 s0, exec_lo, s0
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    s_and_not1_saveexec_b32 s0, s0
+; GFX11-NEXT:    s_cbranch_execz .LBB1_2
+; GFX11-NEXT:  ; %bb.1: ; %cmp.true
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    v_dual_add_f32 v31, 1.0, v31 :: v_dual_add_f32 v30, 1.0, v30
+; GFX11-NEXT:    v_dual_add_f32 v29, 1.0, v29 :: v_dual_add_f32 v28, 1.0, v28
+; GFX11-NEXT:    v_dual_add_f32 v27, 1.0, v27 :: v_dual_add_f32 v26, 1.0, v26
+; GFX11-NEXT:    v_dual_add_f32 v25, 1.0, v25 :: v_dual_add_f32 v24, 1.0, v24
+; GFX11-NEXT:    v_dual_add_f32 v23, 1.0, v23 :: v_dual_add_f32 v22, 1.0, v22
+; GFX11-NEXT:    v_dual_add_f32 v21, 1.0, v21 :: v_dual_add_f32 v20, 1.0, v20
+; GFX11-NEXT:    v_dual_add_f32 v19, 1.0, v19 :: v_dual_add_f32 v18, 1.0, v18
+; GFX11-NEXT:    v_dual_add_f32 v17, 1.0, v17 :: v_dual_add_f32 v16, 1.0, v16
+; GFX11-NEXT:    v_dual_add_f32 v15, 1.0, v15 :: v_dual_add_f32 v14, 1.0, v14
+; GFX11-NEXT:    v_dual_add_f32 v13, 1.0, v13 :: v_dual_add_f32 v12, 1.0, v12
+; GFX11-NEXT:    v_dual_add_f32 v11, 1.0, v11 :: v_dual_add_f32 v10, 1.0, v10
+; GFX11-NEXT:    v_dual_add_f32 v9, 1.0, v9 :: v_dual_add_f32 v8, 1.0, v8
+; GFX11-NEXT:    v_dual_add_f32 v7, 1.0, v7 :: v_dual_add_f32 v6, 1.0, v6
+; GFX11-NEXT:    v_dual_add_f32 v5, 1.0, v5 :: v_dual_add_f32 v4, 1.0, v4
+; GFX11-NEXT:    v_dual_add_f32 v3, 1.0, v3 :: v_dual_add_f32 v2, 1.0, v2
+; GFX11-NEXT:    v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v0, 1.0, v0
+; GFX11-NEXT:  .LBB1_2: ; %end
+; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %cmp.true, label %cmp.false
+
+cmp.true:
+  %a1 = fadd <32 x float> %a, splat (float 1.000000e+00)
+  %a2 = bitcast <32 x float> %a1 to <32 x i32>
+  br label %end
+
+cmp.false:
+  %a3 = bitcast <32 x float> %a to <32 x i32>
+  br label %end
+
+end:
+  %phi = phi <32 x i32> [ %a2, %cmp.true ], [ %a3, %cmp.false ]
+  ret <32 x i32> %phi
+}
+
+define <16 x i64> @v_bitcast_v32i32_to_v16i64(<32 x i32> %a, i32 %b) {
+; GCN-LABEL: v_bitcast_v32i32_to_v16i64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
+; GCN-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; GCN-NEXT:    s_waitcnt vmcnt(1)
+; GCN-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v32
+; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
+; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GCN-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GCN-NEXT:    s_cbranch_execz .LBB2_2
+; GCN-NEXT:  ; %bb.1: ; %cmp.true
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_add_i32_e32 v31, vcc, 3, v31
+; GCN-NEXT:    v_add_i32_e32 v30, vcc, 3, v30
+; GCN-NEXT:    v_add_i32_e32 v29, vcc, 3, v29
+; GCN-NEXT:    v_add_i32_e32 v28, vcc, 3, v28
+; GCN-NEXT:    v_add_i32_e32 v27, vcc, 3, v27
+; GCN-NEXT:    v_add_i32_e32 v26, vcc, 3, v26
+; GCN-NEXT:    v_add_i32_e32 v25, vcc, 3, v25
+; GCN-NEXT:    v_add_i32_e32 v24, vcc, 3, v24
+; GCN-NEXT:    v_add_i32_e32 v23, vcc, 3, v23
+; GCN-NEXT:    v_add_i32_e32 v22, vcc, 3, v22
+; GCN-NEXT:    v_add_i32_e32 v21, vcc, 3, v21
+; GCN-NEXT:    v_add_i32_e32 v20, vcc, 3, v20
+; GCN-NEXT:    v_add_i32_e32 v19, vcc, 3, v19
+; GCN-NEXT:    v_add_i32_e32 v18, vcc, 3, v18
+; GCN-NEXT:    v_add_i32_e32 v17, vcc, 3, v17
+; GCN-NEXT:    v_add_i32_e32 v16, vcc, 3, v16
+; GCN-NEXT:    v_add_i32_e32 v15, vcc, 3, v15
+; GCN-NEXT:    v_add_i32_e32 v14, vcc, 3, v14
+; GCN-NEXT:   ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/133052


More information about the llvm-commits mailing list