[llvm] [AMDGPU] New AMDGPUInsertSingleUseVDST pass (PR #72388)

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 22 22:25:22 PST 2023


================
@@ -0,0 +1,626 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
+
+# One single-use producer.
+---
+name: one_producer
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: one_producer
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr2
+...
+
+# One single-use producer of a 64-bit value.
+---
+name: one_producer_64bit
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: one_producer_64bit
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0_vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
+  ; CHECK-NEXT:   $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr4_vgpr5
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
+    $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
+  bb.1:
+    liveins: $vgpr4_vgpr5
+...
+
+# Two consecutive single-use producers.
+---
+name: two_producers
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: two_producers
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr3
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr3
+...
+
+# Redefinitions of v0.
+---
+name: redefinitions
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: redefinitions
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+...
+
+# One producer with no consumers.
+---
+name: no_consumer
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: no_consumer
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  bb.1:
+...
+
+# One consumer with two uses of the same value.
+---
+name: one_consumer_two_uses
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: one_consumer_two_uses
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr2
+...
+
+# A longer example.
+---
+name: longer_example
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: longer_example
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode
+  ; CHECK-NEXT:   $vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr16, $vgpr18
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19
+    $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode
+    $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode
+    $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode
+    $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode
+    $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode
+    $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode
+    $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
+    $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
+    $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode
+    $vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode
+  bb.1:
+    liveins: $vgpr16, $vgpr18
+...
+
+# Multiple uses of v0.
+---
+name: multiple_uses_1
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: multiple_uses_1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr1, $vgpr2
+...
+
+# Multiple uses of v0 and redefinitions of v1 and v2.
+---
+name: multiple_uses_2
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: multiple_uses_2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr1, $vgpr2
+...
+
+# Multiple uses of all but v1.
+---
+name: multiple_uses_3
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: multiple_uses_3
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr2, $vgpr3
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec
+  bb.1:
+    liveins: $vgpr2, $vgpr3
+...
+
+# Results are live-in to another basic block.
+---
+name: basic_block_1
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: basic_block_1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.2:
+    liveins: $vgpr1, $vgpr2
+...
+
+# Result v2 has multiple uses in another basic block.
+---
+name: basic_block_2
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: basic_block_2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+  ; CHECK-NEXT:   $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr3
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
+  bb.1:
+    liveins: $vgpr2
+    $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
+  bb.2:
+    liveins: $vgpr3
+...
+
+# Results are redefined in another basic block.
+---
+name: basic_block_3
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: basic_block_3
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
+    $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr1
+    $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  bb.2:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+...
+
+# Exec modified between producer and consumer.
+---
+name: exec_mask
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: exec_mask
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr1_vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   $exec = COPY $vgpr1_vgpr2
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  bb.0:
+    liveins: $vgpr1_vgpr2
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    $exec = COPY $vgpr1_vgpr2
----------------
perlfu wrote:

In my opinion these should be SGPRs so this is legal MIR. (A few similar VGPR to exec copies below as well.)

Given all the tests based on EXEC are assuming Wave64 then the relevant flags should be probably also be set in the llc line for the test, i.e. `-mattr=+wavefrontsize32,-wavefrontsize64` -- in case it effects the reg units for EXEC.

https://github.com/llvm/llvm-project/pull/72388


More information about the llvm-commits mailing list