[llvm] [AMDGPU] In instruction selector, allow copy from physical reg to s1 (PR #96157)

Jun Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 20 11:27:10 PDT 2024


https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/96157

>From a804efc3d419b8f9e6396ab69c6d669a79a3f9f5 Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Thu, 20 Jun 2024 05:12:23 -0500
Subject: [PATCH 1/2] [AMDGPU] In instruction selector, allow copy from
 physical reg to s1

In planned calling convention update, i1 arguments/returns are assigned
to SGPRs without being promoted to i32. We need to update the
instruction selector to allow copy from physical reg to s1 destination.
---
 llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f35bb204cbbdb7..ab71ea5b3c8e7a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -131,6 +131,16 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   Register SrcReg = Src.getReg();
 
   if (isVCC(DstReg, *MRI)) {
+    // In planned update of calling convention, i1 arguments/returns are
+    // assigned to SGPRs without promoting to i32. The following if statement
+    // allows insturctions such as "%0:sreg_64_xexec(s1) = COPY $sgpr4_sgpr5"
+    // to be accepted.
+    if (SrcReg.isPhysical() && SrcReg != AMDGPU::SCC) {
+      const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
+      if (DstRC)
+        return DstRC->contains(SrcReg);
+    }
+
     if (SrcReg == AMDGPU::SCC) {
       const TargetRegisterClass *RC
         = TRI.getConstrainedRegClassForOperand(Dst, *MRI);

>From bb216c13b7f493a0e7eddbc4965e0ce38ebaa842 Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Tue, 20 Aug 2024 13:26:06 -0500
Subject: [PATCH 2/2] Add MIR tests.

---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   5 +-
 ...inst-select-copy-sgpr-to-s1-wave32-err.mir |  19 +++
 .../inst-select-copy-sgpr-to-s1-wave32.mir    | 138 ++++++++++++++++++
 ...inst-select-copy-sgpr-to-s1-wave64-err.mir |  19 +++
 .../inst-select-copy-sgpr-to-s1-wave64.mir    | 138 ++++++++++++++++++
 5 files changed, 315 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index ab71ea5b3c8e7a..0ae743bf6c744c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -131,10 +131,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   Register SrcReg = Src.getReg();
 
   if (isVCC(DstReg, *MRI)) {
-    // In planned update of calling convention, i1 arguments/returns are
-    // assigned to SGPRs without promoting to i32. The following if statement
-    // allows insturctions such as "%0:sreg_64_xexec(s1) = COPY $sgpr4_sgpr5"
-    // to be accepted.
+    // Allow copy from physical register other than SCC to s1.
     if (SrcReg.isPhysical() && SrcReg != AMDGPU::SCC) {
       const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
       if (DstRC)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir
new file mode 100644
index 00000000000000..5f94625a9dd369
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir
@@ -0,0 +1,19 @@
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+# ERR: LLVM ERROR: cannot select: %{{[0-9]+}}:sreg_32_xm0_xexec(s1) = COPY $sgpr4_sgpr5 (in function: copy_sgpr_to_s1_vcc)
+
+---
+name: copy_sgpr_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    %0:vcc(s1) = COPY $sgpr4_sgpr5
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir
new file mode 100644
index 00000000000000..e2802f319cdf01
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir
@@ -0,0 +1,138 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
+
+---
+name: copy_sgpr_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; WAVE32-LABEL: name: copy_sgpr_to_s1_vcc
+    ; WAVE32: liveins: $sgpr0
+    ; WAVE32-NEXT: {{  $}}
+    ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $sgpr0
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vcc(s1) = COPY $sgpr0
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_sgpr_to_s1_sreg_32_xm0_xexec
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; WAVE32-LABEL: name: copy_sgpr_to_s1_sreg_32_xm0_xexec
+    ; WAVE32: liveins: $sgpr0
+    ; WAVE32-NEXT: {{  $}}
+    ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $sgpr0
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:sreg_32_xm0_xexec(s1) = COPY $sgpr0
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:sreg_32_xm0_xexec(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_sgpr_to_s1_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; WAVE32-LABEL: name: copy_sgpr_to_s1_vgpr
+    ; WAVE32: liveins: $sgpr0
+    ; WAVE32-NEXT: {{  $}}
+    ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY]]
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vgpr(s1) = COPY $sgpr0
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vgpr(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_scc_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE32-LABEL: name: copy_scc_to_s1_vcc
+    ; WAVE32:      [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vcc(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_scc_to_s1_sreg_32_xm0_xexec
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE32-LABEL: name: copy_scc_to_s1_sreg_32_xm0_xexec
+    ; WAVE32:      [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:sreg_32_xm0_xexec(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:sreg_32_xm0_xexec(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_scc_to_s1_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE32-LABEL: name: copy_scc_to_s1_vgpr
+    ; WAVE32:      [[COPY:%[0-9]+]]:vgpr_32 = COPY $scc
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY]]
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vgpr(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vgpr(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir
new file mode 100644
index 00000000000000..a8b6cff35efc12
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir
@@ -0,0 +1,19 @@
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+# ERR: LLVM ERROR: cannot select: %{{[0-9]+}}:sreg_64_xexec(s1) = COPY $sgpr4 (in function: copy_sgpr_to_s1_vcc)
+
+---
+name: copy_sgpr_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4
+    %0:vcc(s1) = COPY $sgpr4
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir
new file mode 100644
index 00000000000000..dd371c334c9199
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir
@@ -0,0 +1,138 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
+
+---
+name: copy_sgpr_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    ; WAVE64-LABEL: name: copy_sgpr_to_s1_vcc
+    ; WAVE64: liveins: $sgpr4_sgpr5
+    ; WAVE64-NEXT: {{  $}}
+    ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vcc(s1) = COPY $sgpr4_sgpr5
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_sgpr_to_s1_sreg_64_xexec
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    ; WAVE64-LABEL: name: copy_sgpr_to_s1_sreg_64_xexec
+    ; WAVE64: liveins: $sgpr4_sgpr5
+    ; WAVE64-NEXT: {{  $}}
+    ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:sreg_64_xexec(s1) = COPY $sgpr4_sgpr5
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:sreg_64_xexec(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_sgpr_to_s1_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    ; WAVE64-LABEL: name: copy_sgpr_to_s1_vgpr
+    ; WAVE64: liveins: $sgpr4_sgpr5
+    ; WAVE64-NEXT: {{  $}}
+    ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr4_sgpr5
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]]
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vgpr(s1) = COPY $sgpr4_sgpr5
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
+    S_ENDPGM 0, implicit %1(s32)
+...
+
+---
+name: copy_scc_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: copy_scc_to_s1_vcc
+    ; WAVE64:      [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $scc
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vcc(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
+    S_ENDPGM 0, implicit %1(s32)
+...
+
+---
+name: copy_scc_to_s1_sreg_64_xexec
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: copy_scc_to_s1_sreg_64_xexec
+    ; WAVE64:      [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $scc
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:sreg_64_xexec(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
+    S_ENDPGM 0, implicit %1(s32)
+...
+
+---
+name: copy_scc_to_s1_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: copy_scc_to_s1_vgpr
+    ; WAVE64:      [[COPY:%[0-9]+]]:vgpr_32 = COPY $scc
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]]
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vgpr(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
+    S_ENDPGM 0, implicit %1(s32)
+...



More information about the llvm-commits mailing list