[llvm] [AMDGPU] In instruction selector, allow copy from physical reg to s1 (PR #96157)

Jun Wang via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 10 16:00:01 PST 2024


https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/96157

>From 0390e45392663aa65876f69c54734d23b2b20a8a Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Thu, 20 Jun 2024 05:12:23 -0500
Subject: [PATCH 1/6] [AMDGPU] In instruction selector, allow copy from
 physical reg to s1

In planned calling convention update, i1 arguments/returns are assigned
to SGPRs without being promoted to i32. We need to update the
instruction selector to allow copy from physical reg to s1 destination.
---
 llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3be865f03df1fd..c5cca989bffad8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -130,6 +130,16 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   Register SrcReg = Src.getReg();
 
   if (isVCC(DstReg, *MRI)) {
+    // In planned update of calling convention, i1 arguments/returns are
+    // assigned to SGPRs without promoting to i32. The following if statement
+    // allows insturctions such as "%0:sreg_64_xexec(s1) = COPY $sgpr4_sgpr5"
+    // to be accepted.
+    if (SrcReg.isPhysical() && SrcReg != AMDGPU::SCC) {
+      const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
+      if (DstRC)
+        return DstRC->contains(SrcReg);
+    }
+
     if (SrcReg == AMDGPU::SCC) {
       const TargetRegisterClass *RC
         = TRI.getConstrainedRegClassForOperand(Dst, *MRI);

>From 17b1da98e3eca4a7f200cd8b88589fbbc2835480 Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Tue, 20 Aug 2024 13:26:06 -0500
Subject: [PATCH 2/6] Add MIR tests.

---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   5 +-
 ...inst-select-copy-sgpr-to-s1-wave32-err.mir |  19 +++
 .../inst-select-copy-sgpr-to-s1-wave32.mir    | 138 ++++++++++++++++++
 ...inst-select-copy-sgpr-to-s1-wave64-err.mir |  19 +++
 .../inst-select-copy-sgpr-to-s1-wave64.mir    | 138 ++++++++++++++++++
 5 files changed, 315 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index c5cca989bffad8..d4609e4f7a18c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -130,10 +130,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   Register SrcReg = Src.getReg();
 
   if (isVCC(DstReg, *MRI)) {
-    // In planned update of calling convention, i1 arguments/returns are
-    // assigned to SGPRs without promoting to i32. The following if statement
-    // allows insturctions such as "%0:sreg_64_xexec(s1) = COPY $sgpr4_sgpr5"
-    // to be accepted.
+    // Allow copy from physical register other than SCC to s1.
     if (SrcReg.isPhysical() && SrcReg != AMDGPU::SCC) {
       const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
       if (DstRC)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir
new file mode 100644
index 00000000000000..5f94625a9dd369
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir
@@ -0,0 +1,19 @@
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+# ERR: LLVM ERROR: cannot select: %{{[0-9]+}}:sreg_32_xm0_xexec(s1) = COPY $sgpr4_sgpr5 (in function: copy_sgpr_to_s1_vcc)
+
+---
+name: copy_sgpr_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    %0:vcc(s1) = COPY $sgpr4_sgpr5
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir
new file mode 100644
index 00000000000000..e2802f319cdf01
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir
@@ -0,0 +1,138 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
+
+---
+name: copy_sgpr_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; WAVE32-LABEL: name: copy_sgpr_to_s1_vcc
+    ; WAVE32: liveins: $sgpr0
+    ; WAVE32-NEXT: {{  $}}
+    ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $sgpr0
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vcc(s1) = COPY $sgpr0
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_sgpr_to_s1_sreg_32_xm0_xexec
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; WAVE32-LABEL: name: copy_sgpr_to_s1_sreg_32_xm0_xexec
+    ; WAVE32: liveins: $sgpr0
+    ; WAVE32-NEXT: {{  $}}
+    ; WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $sgpr0
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:sreg_32_xm0_xexec(s1) = COPY $sgpr0
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:sreg_32_xm0_xexec(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_sgpr_to_s1_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; WAVE32-LABEL: name: copy_sgpr_to_s1_vgpr
+    ; WAVE32: liveins: $sgpr0
+    ; WAVE32-NEXT: {{  $}}
+    ; WAVE32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY]]
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vgpr(s1) = COPY $sgpr0
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vgpr(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_scc_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE32-LABEL: name: copy_scc_to_s1_vcc
+    ; WAVE32:      [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vcc(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_scc_to_s1_sreg_32_xm0_xexec
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE32-LABEL: name: copy_scc_to_s1_sreg_32_xm0_xexec
+    ; WAVE32:      [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:sreg_32_xm0_xexec(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:sreg_32_xm0_xexec(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_scc_to_s1_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE32-LABEL: name: copy_scc_to_s1_vgpr
+    ; WAVE32:      [[COPY:%[0-9]+]]:vgpr_32 = COPY $scc
+    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY]]
+    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
+    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vgpr(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vgpr(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir
new file mode 100644
index 00000000000000..a8b6cff35efc12
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir
@@ -0,0 +1,19 @@
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+# ERR: LLVM ERROR: cannot select: %{{[0-9]+}}:sreg_64_xexec(s1) = COPY $sgpr4 (in function: copy_sgpr_to_s1_vcc)
+
+---
+name: copy_sgpr_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4
+    %0:vcc(s1) = COPY $sgpr4
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir
new file mode 100644
index 00000000000000..dd371c334c9199
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir
@@ -0,0 +1,138 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s
+
+---
+name: copy_sgpr_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    ; WAVE64-LABEL: name: copy_sgpr_to_s1_vcc
+    ; WAVE64: liveins: $sgpr4_sgpr5
+    ; WAVE64-NEXT: {{  $}}
+    ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vcc(s1) = COPY $sgpr4_sgpr5
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_sgpr_to_s1_sreg_64_xexec
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    ; WAVE64-LABEL: name: copy_sgpr_to_s1_sreg_64_xexec
+    ; WAVE64: liveins: $sgpr4_sgpr5
+    ; WAVE64-NEXT: {{  $}}
+    ; WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:sreg_64_xexec(s1) = COPY $sgpr4_sgpr5
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0:sreg_64_xexec(s1), %2:vgpr, %3:vgpr
+    S_ENDPGM 0, implicit %1:vgpr(s32)
+...
+
+---
+name: copy_sgpr_to_s1_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    ; WAVE64-LABEL: name: copy_sgpr_to_s1_vgpr
+    ; WAVE64: liveins: $sgpr4_sgpr5
+    ; WAVE64-NEXT: {{  $}}
+    ; WAVE64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr4_sgpr5
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]]
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vgpr(s1) = COPY $sgpr4_sgpr5
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
+    S_ENDPGM 0, implicit %1(s32)
+...
+
+---
+name: copy_scc_to_s1_vcc
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: copy_scc_to_s1_vcc
+    ; WAVE64:      [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $scc
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vcc(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
+    S_ENDPGM 0, implicit %1(s32)
+...
+
+---
+name: copy_scc_to_s1_sreg_64_xexec
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: copy_scc_to_s1_sreg_64_xexec
+    ; WAVE64:      [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $scc
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:sreg_64_xexec(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
+    S_ENDPGM 0, implicit %1(s32)
+...
+
+---
+name: copy_scc_to_s1_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+  bb.0:
+    ; WAVE64-LABEL: name: copy_scc_to_s1_vgpr
+    ; WAVE64:      [[COPY:%[0-9]+]]:vgpr_32 = COPY $scc
+    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]]
+    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
+    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
+    ;
+    %0:vgpr(s1) = COPY $scc
+    %2:vgpr(s32) = G_CONSTANT i32 1
+    %3:vgpr(s32) = G_CONSTANT i32 0
+    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
+    S_ENDPGM 0, implicit %1(s32)
+...

>From ea0f4a07b55c031c54c4f1cc5b6457af94806d65 Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Tue, 8 Oct 2024 12:49:31 -0500
Subject: [PATCH 3/6] For copy from phy reg to virtual reg, add condition to
 make sure type of dst reg is s1.

---
 llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d4609e4f7a18c4..88ba7efed3490b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -131,7 +131,8 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
 
   if (isVCC(DstReg, *MRI)) {
     // Allow copy from physical register other than SCC to s1.
-    if (SrcReg.isPhysical() && SrcReg != AMDGPU::SCC) {
+    if (SrcReg.isPhysical() && SrcReg != AMDGPU::SCC &&
+        MRI->getType(DstReg) == LLT::scalar(1)) {
       const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
       if (DstRC)
         return DstRC->contains(SrcReg);

>From 3529b0696260accedfbb73746403bb5e41b1d232 Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Fri, 11 Oct 2024 17:45:14 -0500
Subject: [PATCH 4/6] Create a separate func to check copy from phy reg to VCC.

---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 40 +++++++++++++++----
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |  2 +
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 88ba7efed3490b..16316ea156b6ab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -119,6 +119,34 @@ bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,
          RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
 }
 
+// Returns true if this is a copy from physical reg to VCC.
+// The parameter IsValid further indicates if the copy is valid.
+bool AMDGPUInstructionSelector::isCopyPhysicalToVCC(
+    Register DstReg, Register SrcReg, const MachineRegisterInfo &MRI,
+    bool &IsValid) const {
+  if (DstReg.isPhysical())
+    return false;
+  if (!SrcReg.isPhysical() || SrcReg == AMDGPU::SCC)
+    return false;
+
+  auto &RegClassOrBank = MRI.getRegClassOrRegBank(DstReg);
+  const TargetRegisterClass *RC =
+      RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+  if (RC) {
+    const LLT Ty = MRI.getType(DstReg);
+    if (!Ty.isValid() || Ty.getSizeInBits() != 1)
+      return false;
+    // G_TRUNC s1 result is never vcc.
+    if (MRI.getVRegDef(DstReg)->getOpcode() == AMDGPU::G_TRUNC ||
+        !RC->hasSuperClassEq(TRI.getBoolRC()))
+      return false;
+    IsValid = RC->contains(SrcReg);
+    return true;
+  }
+
+  return false;
+}
+
 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   const DebugLoc &DL = I.getDebugLoc();
   MachineBasicBlock *BB = I.getParent();
@@ -129,15 +157,11 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   Register DstReg = Dst.getReg();
   Register SrcReg = Src.getReg();
 
-  if (isVCC(DstReg, *MRI)) {
-    // Allow copy from physical register other than SCC to s1.
-    if (SrcReg.isPhysical() && SrcReg != AMDGPU::SCC &&
-        MRI->getType(DstReg) == LLT::scalar(1)) {
-      const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(DstReg);
-      if (DstRC)
-        return DstRC->contains(SrcReg);
-    }
+  bool validPhyToRCC = false;
+  if (isCopyPhysicalToVCC(DstReg, SrcReg, *MRI, validPhyToRCC))
+    return validPhyToRCC;
 
+  if (isVCC(DstReg, *MRI)) {
     if (SrcReg == AMDGPU::SCC) {
       const TargetRegisterClass *RC
         = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index d294300be40497..1b03e644f82b88 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -73,6 +73,8 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
 
   bool isInstrUniform(const MachineInstr &MI) const;
   bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const;
+  bool isCopyPhysicalToVCC(Register DstReg, Register SrcReg,
+                           const MachineRegisterInfo &MRI, bool &IsValid) const;
 
   const RegisterBank *getArtifactRegBank(
     Register Reg, const MachineRegisterInfo &MRI,

>From ecfd95754690e2ddd9790d6cc9d27fef62049d53 Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Mon, 4 Nov 2024 17:13:07 -0600
Subject: [PATCH 5/6] Assume machineverifier will handle the cases where the
 register size is incorrect. In that case, a copy from physical reg to VCC is
 allowed.

---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 36 +++----------------
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |  2 --
 ...inst-select-copy-sgpr-to-s1-wave32-err.mir | 19 ----------
 ...inst-select-copy-sgpr-to-s1-wave64-err.mir | 19 ----------
 4 files changed, 4 insertions(+), 72 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir
 delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 16316ea156b6ab..25b9a850437924 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -119,34 +119,6 @@ bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(MachineInstr &MI,
          RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
 }
 
-// Returns true if this is a copy from physical reg to VCC.
-// The parameter IsValid further indicates if the copy is valid.
-bool AMDGPUInstructionSelector::isCopyPhysicalToVCC(
-    Register DstReg, Register SrcReg, const MachineRegisterInfo &MRI,
-    bool &IsValid) const {
-  if (DstReg.isPhysical())
-    return false;
-  if (!SrcReg.isPhysical() || SrcReg == AMDGPU::SCC)
-    return false;
-
-  auto &RegClassOrBank = MRI.getRegClassOrRegBank(DstReg);
-  const TargetRegisterClass *RC =
-      RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
-  if (RC) {
-    const LLT Ty = MRI.getType(DstReg);
-    if (!Ty.isValid() || Ty.getSizeInBits() != 1)
-      return false;
-    // G_TRUNC s1 result is never vcc.
-    if (MRI.getVRegDef(DstReg)->getOpcode() == AMDGPU::G_TRUNC ||
-        !RC->hasSuperClassEq(TRI.getBoolRC()))
-      return false;
-    IsValid = RC->contains(SrcReg);
-    return true;
-  }
-
-  return false;
-}
-
 bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   const DebugLoc &DL = I.getDebugLoc();
   MachineBasicBlock *BB = I.getParent();
@@ -157,10 +129,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
   Register DstReg = Dst.getReg();
   Register SrcReg = Src.getReg();
 
-  bool validPhyToRCC = false;
-  if (isCopyPhysicalToVCC(DstReg, SrcReg, *MRI, validPhyToRCC))
-    return validPhyToRCC;
-
   if (isVCC(DstReg, *MRI)) {
     if (SrcReg == AMDGPU::SCC) {
       const TargetRegisterClass *RC
@@ -170,6 +138,10 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
       return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
     }
 
+    // Allow copy from physical register other than SCC to s1.
+    if (SrcReg.isPhysical() && SrcReg != AMDGPU::SCC)
+      return true;
+
     if (!isVCC(SrcReg, *MRI)) {
       // TODO: Should probably leave the copy and let copyPhysReg expand it.
       if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 1b03e644f82b88..d294300be40497 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -73,8 +73,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
 
   bool isInstrUniform(const MachineInstr &MI) const;
   bool isVCC(Register Reg, const MachineRegisterInfo &MRI) const;
-  bool isCopyPhysicalToVCC(Register DstReg, Register SrcReg,
-                           const MachineRegisterInfo &MRI, bool &IsValid) const;
 
   const RegisterBank *getArtifactRegBank(
     Register Reg, const MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir
deleted file mode 100644
index 5f94625a9dd369..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32-err.mir
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s 2>&1 | FileCheck -check-prefix=ERR %s
-
-# ERR: LLVM ERROR: cannot select: %{{[0-9]+}}:sreg_32_xm0_xexec(s1) = COPY $sgpr4_sgpr5 (in function: copy_sgpr_to_s1_vcc)
-
----
-name: copy_sgpr_to_s1_vcc
-legalized: true
-regBankSelected: true
-
-body: |
-  bb.0:
-    liveins: $sgpr4_sgpr5
-    %0:vcc(s1) = COPY $sgpr4_sgpr5
-    %2:vgpr(s32) = G_CONSTANT i32 1
-    %3:vgpr(s32) = G_CONSTANT i32 0
-    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
-    S_ENDPGM 0, implicit %1:vgpr(s32)
-...
-
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir
deleted file mode 100644
index a8b6cff35efc12..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64-err.mir
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s 2>&1 | FileCheck -check-prefix=ERR %s
-
-# ERR: LLVM ERROR: cannot select: %{{[0-9]+}}:sreg_64_xexec(s1) = COPY $sgpr4 (in function: copy_sgpr_to_s1_vcc)
-
----
-name: copy_sgpr_to_s1_vcc
-legalized: true
-regBankSelected: true
-
-body: |
-  bb.0:
-    liveins: $sgpr4
-    %0:vcc(s1) = COPY $sgpr4
-    %2:vgpr(s32) = G_CONSTANT i32 1
-    %3:vgpr(s32) = G_CONSTANT i32 0
-    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
-    S_ENDPGM 0, implicit %1:vgpr(s32)
-...
-

>From c5f42c1f6cdba57e1d5a77e893715e0fe8b94ea7 Mon Sep 17 00:00:00 2001
From: Jun Wang <jwang86 at yahoo.com>
Date: Tue, 10 Dec 2024 15:58:26 -0800
Subject: [PATCH 6/6] For copy from physical reg, check the dst reg type is i1.

---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |  2 +-
 .../inst-select-copy-sgpr-to-s1-wave32.mir    | 66 +------------------
 .../inst-select-copy-sgpr-to-s1-wave64.mir    | 64 ------------------
 3 files changed, 2 insertions(+), 130 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 25b9a850437924..c682069cad7845 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -139,7 +139,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
     }
 
     // Allow copy from physical register other than SCC to s1.
-    if (SrcReg.isPhysical() && SrcReg != AMDGPU::SCC)
+    if (SrcReg.isPhysical() && MRI->getType(DstReg) == LLT::scalar(1))
       return true;
 
     if (!isVCC(SrcReg, *MRI)) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir
index e2802f319cdf01..a2b66d63c8d081 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave32.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=WAVE32 %s
 
 ---
 name: copy_sgpr_to_s1_vcc
@@ -72,67 +72,3 @@ body: |
     %1:vgpr(s32) = G_SELECT %0:vgpr(s1), %2:vgpr, %3:vgpr
     S_ENDPGM 0, implicit %1:vgpr(s32)
 ...
-
----
-name: copy_scc_to_s1_vcc
-legalized: true
-regBankSelected: true
-
-body: |
-  bb.0:
-    ; WAVE32-LABEL: name: copy_scc_to_s1_vcc
-    ; WAVE32:      [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
-    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
-    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
-    ;
-    %0:vcc(s1) = COPY $scc
-    %2:vgpr(s32) = G_CONSTANT i32 1
-    %3:vgpr(s32) = G_CONSTANT i32 0
-    %1:vgpr(s32) = G_SELECT %0:vcc(s1), %2:vgpr, %3:vgpr
-    S_ENDPGM 0, implicit %1:vgpr(s32)
-...
-
----
-name: copy_scc_to_s1_sreg_32_xm0_xexec
-legalized: true
-regBankSelected: true
-
-body: |
-  bb.0:
-    ; WAVE32-LABEL: name: copy_scc_to_s1_sreg_32_xm0_xexec
-    ; WAVE32:      [[COPY:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
-    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
-    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
-    ;
-    %0:sreg_32_xm0_xexec(s1) = COPY $scc
-    %2:vgpr(s32) = G_CONSTANT i32 1
-    %3:vgpr(s32) = G_CONSTANT i32 0
-    %1:vgpr(s32) = G_SELECT %0:sreg_32_xm0_xexec(s1), %2:vgpr, %3:vgpr
-    S_ENDPGM 0, implicit %1:vgpr(s32)
-...
-
----
-name: copy_scc_to_s1_vgpr
-legalized: true
-regBankSelected: true
-
-body: |
-  bb.0:
-    ; WAVE32-LABEL: name: copy_scc_to_s1_vgpr
-    ; WAVE32:      [[COPY:%[0-9]+]]:vgpr_32 = COPY $scc
-    ; WAVE32-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; WAVE32-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; WAVE32-NEXT: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[COPY]]
-    ; WAVE32-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
-    ; WAVE32-NEXT: S_ENDPGM 0, implicit [[VCND]]
-    ;
-    %0:vgpr(s1) = COPY $scc
-    %2:vgpr(s32) = G_CONSTANT i32 1
-    %3:vgpr(s32) = G_CONSTANT i32 0
-    %1:vgpr(s32) = G_SELECT %0:vgpr(s1), %2:vgpr, %3:vgpr
-    S_ENDPGM 0, implicit %1:vgpr(s32)
-...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir
index dd371c334c9199..4afd97626f563f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-sgpr-to-s1-wave64.mir
@@ -72,67 +72,3 @@ body: |
     %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
     S_ENDPGM 0, implicit %1(s32)
 ...
-
----
-name: copy_scc_to_s1_vcc
-legalized: true
-regBankSelected: true
-
-body: |
-  bb.0:
-    ; WAVE64-LABEL: name: copy_scc_to_s1_vcc
-    ; WAVE64:      [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $scc
-    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
-    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
-    ;
-    %0:vcc(s1) = COPY $scc
-    %2:vgpr(s32) = G_CONSTANT i32 1
-    %3:vgpr(s32) = G_CONSTANT i32 0
-    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
-    S_ENDPGM 0, implicit %1(s32)
-...
-
----
-name: copy_scc_to_s1_sreg_64_xexec
-legalized: true
-regBankSelected: true
-
-body: |
-  bb.0:
-    ; WAVE64-LABEL: name: copy_scc_to_s1_sreg_64_xexec
-    ; WAVE64:      [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $scc
-    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY]], implicit $exec
-    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
-    ;
-    %0:sreg_64_xexec(s1) = COPY $scc
-    %2:vgpr(s32) = G_CONSTANT i32 1
-    %3:vgpr(s32) = G_CONSTANT i32 0
-    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
-    S_ENDPGM 0, implicit %1(s32)
-...
-
----
-name: copy_scc_to_s1_vgpr
-legalized: true
-regBankSelected: true
-
-body: |
-  bb.0:
-    ; WAVE64-LABEL: name: copy_scc_to_s1_vgpr
-    ; WAVE64:      [[COPY:%[0-9]+]]:vgpr_32 = COPY $scc
-    ; WAVE64-NEXT: [[VMOV1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; WAVE64-NEXT: [[VMOV2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; WAVE64-NEXT: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[COPY]]
-    ; WAVE64-NEXT: [[VCND:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[VMOV2]], 0, [[VMOV1]], [[COPY2]], implicit $exec
-    ; WAVE64-NEXT: S_ENDPGM 0, implicit [[VCND]]
-    ;
-    %0:vgpr(s1) = COPY $scc
-    %2:vgpr(s32) = G_CONSTANT i32 1
-    %3:vgpr(s32) = G_CONSTANT i32 0
-    %1:vgpr(s32) = G_SELECT %0(s1), %2, %3
-    S_ENDPGM 0, implicit %1(s32)
-...



More information about the llvm-commits mailing list