[llvm] [AMDGPU] Set register bank for i1 arguments/return values (PR #96155)

Jun Wang via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 10 21:24:52 PDT 2024


https://github.com/jwanggit86 updated https://github.com/llvm/llvm-project/pull/96155

>From 860ddbec1125a5ae07adb4ae6a2e65afbe77254b Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Thu, 20 Jun 2024 04:48:14 -0500
Subject: [PATCH 1/2] [AMDGPU] Set register bank for i1 arguments/return values
 for planned calling convention update

In planned work, the calling convention is to be updated such that
i1 arguments and return values are assigned to SGPRs. For this change,
we need to ensure the register banks are correctly assigned.
---
 llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 0510a1d2eff88..ddf49f153d2c6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3745,6 +3745,21 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     if (!DstBank)
       DstBank = SrcBank;
 
+    // The calling convention is to be updated such that i1 function arguments
+    // or return values are assigned to SGPRs without promoting to i32. With
+    // this, for i1 function arguments, the call of getRegBank() above gives
+    // incorrect result. We set both src and dst banks to VCCRegBank.
+    if (!MI.getOperand(1).getReg().isVirtual() &&
+        MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(1)) {
+      DstBank = SrcBank = &AMDGPU::VCCRegBank;
+    }
+
+    // Similarly, for i1 return value, the dst reg is an SReg but we need to
+    // explicitly set the reg bank to VCCRegBank.
+    if (!MI.getOperand(0).getReg().isVirtual() &&
+        SrcBank == &AMDGPU::VCCRegBank)
+      DstBank = SrcBank;
+
     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
     if (MI.getOpcode() != AMDGPU::G_FREEZE &&
         cannotCopy(*DstBank, *SrcBank, TypeSize::getFixed(Size)))

>From 7377060c0cfcf73c23ce040a0d128c1797a831d8 Mon Sep 17 00:00:00 2001
From: Jun Wang <jun.wang7 at amd.com>
Date: Wed, 10 Jul 2024 23:22:22 -0500
Subject: [PATCH 2/2] Modified the conditions for seeting both SrcBank and
 DstBank to VCCRegBank in getInstrMapping(). Also created mir tests.

---
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |  23 +-
 .../AMDGPU/GlobalISel/regbankselect-copy.mir  | 272 ++++++++++++++++++
 2 files changed, 282 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index ddf49f153d2c6..a8da2a066e7ea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3742,22 +3742,19 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     const RegisterBank *SrcBank = getRegBank(MI.getOperand(1).getReg(), MRI,
                                              *TRI);
     assert(SrcBank && "src bank should have been assigned already");
-    if (!DstBank)
-      DstBank = SrcBank;
 
-    // The calling convention is to be updated such that i1 function arguments
-    // or return values are assigned to SGPRs without promoting to i32. With
-    // this, for i1 function arguments, the call of getRegBank() above gives
-    // incorrect result. We set both src and dst banks to VCCRegBank.
-    if (!MI.getOperand(1).getReg().isVirtual() &&
-        MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(1)) {
+    // For copy from a physical reg to s1 dest, the call of getRegBank() above
+    // gives incorrect result. We set both src and dst banks to VCCRegBank.
+    if (!MI.getOperand(1).getReg().isVirtual() && !DstBank &&
+        MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(1))
+      DstBank = SrcBank = &AMDGPU::VCCRegBank;
+    // For copy from s1 src to a physical reg, we set both src and dst banks to
+    // VCCRegBank.
+    else if (!MI.getOperand(0).getReg().isVirtual() &&
+             MRI.getType(MI.getOperand(1).getReg()) == LLT::scalar(1))
       DstBank = SrcBank = &AMDGPU::VCCRegBank;
-    }
 
-    // Similarly, for i1 return value, the dst reg is an SReg but we need to
-    // explicitly set the reg bank to VCCRegBank.
-    if (!MI.getOperand(0).getReg().isVirtual() &&
-        SrcBank == &AMDGPU::VCCRegBank)
+    if (!DstBank)
       DstBank = SrcBank;
 
     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir
index 48de4838b78f9..869e2d38278fd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-copy.mir
@@ -201,3 +201,275 @@ body: |
     %2:vcc(s1) = COPY %1
     S_ENDPGM 0, implicit %2
 ...
+
+---
+name: copy_sgpr_64_to_s1
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    ; CHECK-LABEL: name: copy_sgpr_64_to_s1
+    ; CHECK: liveins: $sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
+    %0:_(s1) = COPY $sgpr4_sgpr5
+    %1:_(s32) = G_ZEXT %0:_(s1)
+...
+
+---
+name: copy_sgpr_32_to_s1
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: copy_sgpr_32_to_s1
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0
+    ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
+    %0:_(s1) = COPY $sgpr0
+    %1:_(s32) = G_ZEXT %0:_(s1)
+...
+
+---
+name: copy2_sgpr_64_to_s1
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5, $sgpr6_sgpr7
+    ; CHECK-LABEL: name: copy2_sgpr_64_to_s1
+    ; CHECK: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr6_sgpr7
+    ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
+    ; CHECK-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
+    %0:_(s1) = COPY $sgpr4_sgpr5
+    %1:_(s1) = COPY $sgpr6_sgpr7
+    %2:_(s32) = G_ZEXT %0:_(s1)
+    %3:_(s32) = G_ZEXT %1:_(s1)
+...
+
+---
+name: copy2_sgpr_32_to_s1
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    ; CHECK-LABEL: name: copy2_sgpr_32_to_s1
+    ; CHECK: liveins: $sgpr0, $sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY $sgpr1
+    ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
+    ; CHECK-NEXT: [[CONST3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[CONST4:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY2]](s1), [[CONST3]], [[CONST4]]
+    %0:_(s1) = COPY $sgpr0
+    %1:_(s1) = COPY $sgpr1
+    %2:_(s32) = G_ZEXT %0:_(s1)
+    %3:_(s32) = G_ZEXT %1:_(s1)
+...
+
+---
+name: copy_sgpr_64_and_sgpr_32_to_s1
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr6, $sgpr4_sgpr5
+    ; CHECK-LABEL: name: copy_sgpr_64_and_sgpr_32_to_s1
+    ; CHECK: liveins: $sgpr6, $sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
+    ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY1]](s1), [[CONST1]], [[CONST2]]
+    ; CHECK-NEXT: [[CONST3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[CONST3]]
+    %0:_(s1) = COPY $sgpr4_sgpr5
+    %2:_(s32) = COPY $sgpr6
+    %7:_(s32) = G_ZEXT %0:_(s1)
+    %5:_(s32) = G_CONSTANT i32 1
+    %4:_(s32) = G_AND %2:_, %5:_
+...
+
+---
+name: copy_sgpr_64_to_s1_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    ; CHECK-LABEL: name: copy_sgpr_64_to_s1_vgpr
+    ; CHECK: liveins: $sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
+    %0:vgpr(s1) = COPY $sgpr4_sgpr5
+    %1:_(s32) = G_ZEXT %0:vgpr(s1)
+...
+
+---
+name: copy_sgpr_32_to_s1_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: copy_sgpr_32_to_s1_vgpr
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s1) = COPY $sgpr0
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[COPY]](s1)
+    %0:vgpr(s1) = COPY $sgpr0
+    %1:_(s32) = G_ZEXT %0:vgpr(s1)
+...
+
+---
+name: copy_sgpr_64_to_s1_vcc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+    ; CHECK-LABEL: name: copy_sgpr_64_to_s1_vcc
+    ; CHECK: liveins: $sgpr4_sgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr4_sgpr5
+    ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
+    %0:vcc(s1) = COPY $sgpr4_sgpr5
+    %1:_(s32) = G_ZEXT %0:vcc(s1)
+...
+
+---
+name: copy_sgpr_32_to_s1_vcc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: copy_sgpr_32_to_s1_vcc
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vcc(s1) = COPY $sgpr0
+    ; CHECK-NEXT: [[CONST1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[CONST2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY]](s1), [[CONST1]], [[CONST2]]
+    %0:vcc(s1) = COPY $sgpr0
+    %1:_(s32) = G_ZEXT %0:vcc(s1)
+...
+
+---
+name: copy_virt_reg_to_s1
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: copy_virt_reg_to_s1
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s1) = G_TRUNC %0
+    %2:_(s1) = COPY %1
+...
+
+---
+name: copy_virt_reg_to_s1_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: copy_virt_reg_to_s1_vgpr
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s1) = COPY [[TRUNC]](s1)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s1) = COPY [[COPY2]](s1)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s1) = G_TRUNC %0
+    %2:vgpr(s1) = COPY %1
+    %3:_(s1) = COPY %2
+...
+
+
+---
+name: copy_virt_reg_to_s1_vcc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: copy_virt_reg_to_s1_vcc
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1)
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vcc(s1) = COPY [[COPY2]](s1)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s1) = G_TRUNC %0
+    %2:vcc(s1) = COPY %1
+    %3:_(s1) = COPY %2
+...
+
+---
+name: copy_s1_to_sgpr_64
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: copy_s1_to_sgpr_64
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[TRUNC]](s1)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s1) = G_TRUNC %0
+    $sgpr4_sgpr5 = COPY %1
+...
+
+---
+name: copy_s1_to_sgpr_32
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: copy_s1_to_sgpr_32
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: $sgpr0 = COPY [[TRUNC]](s1)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s1) = G_TRUNC %0
+    $sgpr0 = COPY %1
+...
+



More information about the llvm-commits mailing list