[llvm] [AMDGPU] Add MachineVerifier check to detect illegal copies from vector register to SGPR (PR #105494)
Aditi Medhane via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 3 03:05:44 PDT 2024
https://github.com/AditiRM updated https://github.com/llvm/llvm-project/pull/105494
>From 28724d61d2b4c5da60bf44f46a68aa004d35ebb6 Mon Sep 17 00:00:00 2001
From: AditiRM <aditi.medhane at amd.com>
Date: Wed, 21 Aug 2024 06:36:47 +0000
Subject: [PATCH 1/5] [AMDGPU] Add MachineVerifer check to detect illegal
copies from VGPR to SGPR
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 32 +++++++++++++++++--
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 ++
.../AMDGPU/fix-illegal-vgpr-copies.mir | 29 +++++++++++++++++
.../AMDGPU/phi-moveimm-subreg-input.mir | 30 +++++++++++++++++
.../CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir | 32 -------------------
llvm/test/CodeGen/AMDGPU/wqm.mir | 9 +++---
6 files changed, 95 insertions(+), 40 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/fix-illegal-vgpr-copies.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/phi-moveimm-subreg-input.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index b6dd4905fb61bb..22572a92227b70 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4613,15 +4613,41 @@ static bool isSubRegOf(const SIRegisterInfo &TRI,
SubReg.getReg() == SuperVec.getReg();
}
+// Verify the illgal copy from VGPR to SGPR for generic opcode COPY
+bool SIInstrInfo::verifyCopy(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ StringRef &ErrInfo) const {
+ const MachineOperand &Dst = MI.getOperand(0);
+ const MachineOperand &Src = MI.getOperand(1);
+
+ if (Dst.isReg() && Src.isReg()) {
+ Register DstReg = Dst.getReg();
+ Register SrcReg = Src.getReg();
+ // This is a check for copy from an VGPR to SGPR
+ if (RI.isVGPR(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
+ ErrInfo = "illegal copy from VGPR to SGPR";
+ return false;
+ }
+ }
+ return true;
+}
+
bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI.getOpcode();
- if (SIInstrInfo::isGenericOpcode(MI.getOpcode()))
- return true;
-
const MachineFunction *MF = MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) {
+ // FIXME: At this point the COPY verify is done only for non-ssa forms.
+ // Find a better property to recognize the point where instruction selection
+ // is just done.
+ if (!MRI.isSSA() && MI.isCopy())
+ return verifyCopy(MI, MRI, ErrInfo);
+
+ return true;
+ }
+
int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 1712dfe8d406cc..4caf37cd2f08e0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -178,6 +178,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
+ bool verifyCopy(const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ StringRef &ErrInfo) const;
+
protected:
/// If the specific machine instruction is a instruction that moves/copies
/// value from one register to another register return destination and source
diff --git a/llvm/test/CodeGen/AMDGPU/fix-illegal-vgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-illegal-vgpr-copies.mir
new file mode 100644
index 00000000000000..8eaab0a16e55e4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fix-illegal-vgpr-copies.mir
@@ -0,0 +1,29 @@
+# RUN: not --crash llc -march=amdgcn -mcpu=gfx1200 -run-pass=machineverifier %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+---
+name: fix-illegal-copies
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %0:vgpr_32 = IMPLICIT_DEF ; Break SSA format
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:sgpr_32 = IMPLICIT_DEF
+ %3:sgpr_32 = IMPLICIT_DEF
+
+ ; ERR: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; ERR: instruction: %4:sgpr_32 = COPY %0:vgpr_32
+ %4:sgpr_32 = COPY %0:vgpr_32
+
+ ; ERR: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; ERR: instruction: $sgpr0 = COPY %0:vgpr_32
+ $sgpr0 = COPY %0:vgpr_32
+
+ ; ERR: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; ERR: instruction: $sgpr1 = COPY $vgpr0
+ $sgpr1 = COPY $vgpr0
+
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/phi-moveimm-subreg-input.mir b/llvm/test/CodeGen/AMDGPU/phi-moveimm-subreg-input.mir
new file mode 100644
index 00000000000000..5d58673f7bdca9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/phi-moveimm-subreg-input.mir
@@ -0,0 +1,30 @@
+# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fix-sgpr-copies -o - %s 2>&1 | FileCheck -check-prefix=GCN %s
+
+# GCN: *** Bad machine code: illegal copy from VGPR to SGPR ***
+# GCN: instruction: undef %5.sub0:sreg_64 = COPY %0:vgpr_32
+name: phi_moveimm_subreg_input
+tracksRegLiveness: true
+body: |
+ bb.0:
+ successors: %bb.1
+ liveins: $sgpr0, $sgpr1
+
+ %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+ %4:sreg_32 = COPY $sgpr0
+ %5:sreg_32 = COPY $sgpr1
+
+ bb.1:
+ successors: %bb.2
+ undef %2.sub0:sreg_64 = S_ADD_U32 %4, %5, implicit-def $scc
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+ %3:sreg_64 = PHI %1, %bb.3, %2, %bb.1
+ S_BRANCH %bb.3
+
+ bb.3:
+ successors: %bb.2
+ undef %1.sub0:sreg_64 = COPY %0
+ S_BRANCH %bb.2
diff --git a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
index f931acb8408da2..2b78f984ae775c 100644
--- a/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
+++ b/llvm/test/CodeGen/AMDGPU/phi-vgpr-input-moveimm.mir
@@ -32,38 +32,6 @@ body: |
S_BRANCH %bb.2
...
----
-# GCN-LABEL: name: phi_moveimm_subreg_input
-# GCN: %{{[0-9]+}}:sreg_64 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
-name: phi_moveimm_subreg_input
-tracksRegLiveness: true
-body: |
- bb.0:
- successors: %bb.1
- liveins: $sgpr0, $sgpr1
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-
- %4:sreg_32 = COPY $sgpr0
- %5:sreg_32 = COPY $sgpr1
-
- bb.1:
- successors: %bb.2
- undef %2.sub0:sreg_64 = S_ADD_U32 %4, %5, implicit-def $scc
- S_BRANCH %bb.2
-
- bb.2:
- successors: %bb.3
- %3:sreg_64 = PHI %1, %bb.3, %2, %bb.1
- S_BRANCH %bb.3
-
- bb.3:
- successors: %bb.2
- undef %1.sub0:sreg_64 = COPY %0
- S_BRANCH %bb.2
-...
-
-
---
# GCN-LABEL: name: phi_moveimm_bad_opcode_input
# GCN-NOT: %{{[0-9]+}}:sreg_32 = PHI %{{[0-9]+}}, %bb.3, %{{[0-9]+}}, %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index ef6d0780f395fd..5ff508b1e3842e 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -189,9 +189,9 @@ body: |
# Ensure that strict_wwm is not put around an EXEC copy
#CHECK-LABEL: name: copy_exec
#CHECK: %7:sreg_64 = COPY $exec
-#CHECK-NEXT: %14:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
+#CHECK-NEXT: %13:sreg_64 = ENTER_STRICT_WWM -1, implicit-def $exec, implicit-def $scc, implicit $exec
#CHECK-NEXT: %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-#CHECK-NEXT: $exec = EXIT_STRICT_WWM %14
+#CHECK-NEXT: $exec = EXIT_STRICT_WWM %13
#CHECK-NEXT: %9:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %7.sub0, 0, implicit $exec
name: copy_exec
tracksRegLiveness: true
@@ -212,10 +212,9 @@ body: |
%10:vgpr_32 = V_MBCNT_LO_U32_B32_e64 %8.sub0:sreg_64, 0, implicit $exec
%11:vgpr_32 = V_MOV_B32_dpp %9:vgpr_32, %10:vgpr_32, 312, 15, 15, 0, implicit $exec
%12:sreg_32 = V_READLANE_B32 %11:vgpr_32, 63
- early-clobber %13:sreg_32 = STRICT_WWM %9:vgpr_32, implicit $exec
+ early-clobber %13:vgpr_32 = STRICT_WWM %9:vgpr_32, implicit $exec
- %14:vgpr_32 = COPY %13
- BUFFER_STORE_DWORD_OFFSET_exact killed %14, %4, %5, 4, 0, 0, implicit $exec
+ BUFFER_STORE_DWORD_OFFSET_exact killed %13, %4, %5, 4, 0, 0, implicit $exec
S_ENDPGM 0
...
>From e5f73a030974e545bee3657065748e3fa10c083d Mon Sep 17 00:00:00 2001
From: AditiRM <Aditi.Medhane at amd.com>
Date: Tue, 27 Aug 2024 15:14:18 +0530
Subject: [PATCH 2/5] Handle review comments
Addition of illegal AGPR to SGPR copies and update with review changes
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 24 ++++----
.../AMDGPU/fix-illegal-vgpr-copies.mir | 29 ---------
.../MachineVerifier/fix-illegal-copies.mir | 60 +++++++++++++++++++
3 files changed, 74 insertions(+), 39 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/fix-illegal-vgpr-copies.mir
create mode 100644 llvm/test/MachineVerifier/fix-illegal-copies.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 22572a92227b70..4762c1283b6b4d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4613,22 +4613,27 @@ static bool isSubRegOf(const SIRegisterInfo &TRI,
SubReg.getReg() == SuperVec.getReg();
}
-// Verify the illgal copy from VGPR to SGPR for generic opcode COPY
+// Verify the illegal copy from VGPR to SGPR for generic opcode COPY
bool SIInstrInfo::verifyCopy(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
StringRef &ErrInfo) const {
const MachineOperand &Dst = MI.getOperand(0);
const MachineOperand &Src = MI.getOperand(1);
- if (Dst.isReg() && Src.isReg()) {
- Register DstReg = Dst.getReg();
- Register SrcReg = Src.getReg();
- // This is a check for copy from an VGPR to SGPR
- if (RI.isVGPR(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
- ErrInfo = "illegal copy from VGPR to SGPR";
- return false;
- }
+ Register DstReg = Dst.getReg();
+ Register SrcReg = Src.getReg();
+ // This is a check for copy from an VGPR to SGPR
+ if (RI.isVGPR(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
+ ErrInfo = "illegal copy from VGPR to SGPR";
+ return false;
}
+
+ // This is a check for copy from an AGPR to SGPR
+ if (RI.isAGPR(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
+ ErrInfo = "illegal copy from AGPR to SGPR";
+ return false;
+ }
+
return true;
}
@@ -4644,7 +4649,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// is just done.
if (!MRI.isSSA() && MI.isCopy())
return verifyCopy(MI, MRI, ErrInfo);
-
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fix-illegal-vgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-illegal-vgpr-copies.mir
deleted file mode 100644
index 8eaab0a16e55e4..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/fix-illegal-vgpr-copies.mir
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: not --crash llc -march=amdgcn -mcpu=gfx1200 -run-pass=machineverifier %s 2>&1 | FileCheck -check-prefix=ERR %s
-
----
-name: fix-illegal-copies
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
-body: |
- bb.0:
- %0:vgpr_32 = IMPLICIT_DEF
- %0:vgpr_32 = IMPLICIT_DEF ; Break SSA format
- %1:vgpr_32 = IMPLICIT_DEF
- %2:sgpr_32 = IMPLICIT_DEF
- %3:sgpr_32 = IMPLICIT_DEF
-
- ; ERR: *** Bad machine code: illegal copy from VGPR to SGPR ***
- ; ERR: instruction: %4:sgpr_32 = COPY %0:vgpr_32
- %4:sgpr_32 = COPY %0:vgpr_32
-
- ; ERR: *** Bad machine code: illegal copy from VGPR to SGPR ***
- ; ERR: instruction: $sgpr0 = COPY %0:vgpr_32
- $sgpr0 = COPY %0:vgpr_32
-
- ; ERR: *** Bad machine code: illegal copy from VGPR to SGPR ***
- ; ERR: instruction: $sgpr1 = COPY $vgpr0
- $sgpr1 = COPY $vgpr0
-
- S_ENDPGM 0
-...
diff --git a/llvm/test/MachineVerifier/fix-illegal-copies.mir b/llvm/test/MachineVerifier/fix-illegal-copies.mir
new file mode 100644
index 00000000000000..e1737eb0d0c959
--- /dev/null
+++ b/llvm/test/MachineVerifier/fix-illegal-copies.mir
@@ -0,0 +1,60 @@
+# RUN: not --crash llc -march=amdgcn -mcpu=gfx1200 -run-pass=none -o /dev/null %s 2>&1 | FileCheck %s
+# REQUIRES: amdgpu-registered-target
+
+---
+name: fix-illegal-copies
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ %0:vgpr_32 = IMPLICIT_DEF
+ %0:vgpr_32 = IMPLICIT_DEF ; Break SSA format
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:sgpr_32 = IMPLICIT_DEF
+ %3:sgpr_32 = IMPLICIT_DEF
+ %4:agpr_32 = IMPLICIT_DEF
+ %5:agpr_32 = IMPLICIT_DEF
+
+ ; copy from virtual VGPR to virtual SGPR
+ ; CHECK: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; CHECK: - instruction: %6:sgpr_32 = COPY %0:vgpr_32
+ %6:sgpr_32 = COPY %0:vgpr_32
+
+ ; copy from virtual VGPR to physical SGPR
+ ; CHECK: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; CHECK: - instruction: $sgpr0 = COPY %0:vgpr_32
+ $sgpr0 = COPY %0:vgpr_32
+
+ ; copy from physical VGPR to physical SGPR
+ ; CHECK: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; CHECK: - instruction: $sgpr1 = COPY $vgpr0
+ $sgpr1 = COPY $vgpr0
+
+ ; copy from virtual AGPR to virtual SGPR
+ ; CHECK: *** Bad machine code: illegal copy from AGPR to SGPR ***
+ ; CHECK: - instruction: %7:sgpr_32 = COPY %4:agpr_32
+ %7:sgpr_32 = COPY %4:agpr_32
+
+ ; copy from virtual AGPR to physical SGPR
+ ; CHECK: *** Bad machine code: illegal copy from AGPR to SGPR ***
+ ; CHECK: - instruction: $sgpr2 = COPY %4:agpr_32
+ $sgpr2 = COPY %4:agpr_32
+
+ ; copy from physical AGPR to physical SGPR
+ ; CHECK: *** Bad machine code: illegal copy from AGPR to SGPR ***
+ ; CHECK: - instruction: $sgpr3 = COPY $agpr0
+ $sgpr3 = COPY $agpr0
+
+ ; copy from tuple of physical VGPRs to tuple of physical SGPRs
+ ; CHECK: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; CHECK: - instruction: $sgpr4_sgpr5 = COPY $vgpr0_vgpr1
+ $sgpr4_sgpr5 = COPY $vgpr0_vgpr1
+
+ ; copy from tuple of physical AGPRs to tuple of physical SGPRs
+ ; CHECK: *** Bad machine code: illegal copy from AGPR to SGPR ***
+ ; CHECK: - instruction: $sgpr6_sgpr7 = COPY $agpr0_agpr1
+ $sgpr6_sgpr7 = COPY $agpr0_agpr1
+
+ S_ENDPGM 0
+...
>From 9c89b57102e05ed82b76e818349be555d2d6ba60 Mon Sep 17 00:00:00 2001
From: AditiRM <Aditi.Medhane at amd.com>
Date: Wed, 28 Aug 2024 10:27:20 +0530
Subject: [PATCH 3/5] Handle Review comments
introduced vector register illegal copies for both VGPR and AGPR, removed subreg-input testcase scenario, updated the testcase error checks accordingly
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 23 +++++---------
.../AMDGPU/phi-moveimm-subreg-input.mir | 30 -------------------
.../MachineVerifier/fix-illegal-copies.mir | 16 +++++-----
3 files changed, 15 insertions(+), 54 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/phi-moveimm-subreg-input.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4762c1283b6b4d..9f8b856643ccd2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4613,27 +4613,17 @@ static bool isSubRegOf(const SIRegisterInfo &TRI,
SubReg.getReg() == SuperVec.getReg();
}
-// Verify the illegal copy from VGPR to SGPR for generic opcode COPY
+// Verify the illegal copy from vector register to SGPR for generic opcode COPY
bool SIInstrInfo::verifyCopy(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
StringRef &ErrInfo) const {
- const MachineOperand &Dst = MI.getOperand(0);
- const MachineOperand &Src = MI.getOperand(1);
-
- Register DstReg = Dst.getReg();
- Register SrcReg = Src.getReg();
- // This is a check for copy from an VGPR to SGPR
- if (RI.isVGPR(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
- ErrInfo = "illegal copy from VGPR to SGPR";
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ // This is a check for copy from vector register to SGPR
+ if (RI.isVectorRegister(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
+ ErrInfo = "illegal copy from vector register to SGPR";
return false;
}
-
- // This is a check for copy from an AGPR to SGPR
- if (RI.isAGPR(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
- ErrInfo = "illegal copy from AGPR to SGPR";
- return false;
- }
-
return true;
}
@@ -4647,6 +4637,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// FIXME: At this point the COPY verify is done only for non-ssa forms.
// Find a better property to recognize the point where instruction selection
// is just done.
+ // We can only enforce this check after SIFixSGPRCopies pass.
if (!MRI.isSSA() && MI.isCopy())
return verifyCopy(MI, MRI, ErrInfo);
return true;
diff --git a/llvm/test/CodeGen/AMDGPU/phi-moveimm-subreg-input.mir b/llvm/test/CodeGen/AMDGPU/phi-moveimm-subreg-input.mir
deleted file mode 100644
index 5d58673f7bdca9..00000000000000
--- a/llvm/test/CodeGen/AMDGPU/phi-moveimm-subreg-input.mir
+++ /dev/null
@@ -1,30 +0,0 @@
-# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fix-sgpr-copies -o - %s 2>&1 | FileCheck -check-prefix=GCN %s
-
-# GCN: *** Bad machine code: illegal copy from VGPR to SGPR ***
-# GCN: instruction: undef %5.sub0:sreg_64 = COPY %0:vgpr_32
-name: phi_moveimm_subreg_input
-tracksRegLiveness: true
-body: |
- bb.0:
- successors: %bb.1
- liveins: $sgpr0, $sgpr1
-
- %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-
- %4:sreg_32 = COPY $sgpr0
- %5:sreg_32 = COPY $sgpr1
-
- bb.1:
- successors: %bb.2
- undef %2.sub0:sreg_64 = S_ADD_U32 %4, %5, implicit-def $scc
- S_BRANCH %bb.2
-
- bb.2:
- successors: %bb.3
- %3:sreg_64 = PHI %1, %bb.3, %2, %bb.1
- S_BRANCH %bb.3
-
- bb.3:
- successors: %bb.2
- undef %1.sub0:sreg_64 = COPY %0
- S_BRANCH %bb.2
diff --git a/llvm/test/MachineVerifier/fix-illegal-copies.mir b/llvm/test/MachineVerifier/fix-illegal-copies.mir
index e1737eb0d0c959..c3acebf7a694eb 100644
--- a/llvm/test/MachineVerifier/fix-illegal-copies.mir
+++ b/llvm/test/MachineVerifier/fix-illegal-copies.mir
@@ -17,42 +17,42 @@ body: |
%5:agpr_32 = IMPLICIT_DEF
; copy from virtual VGPR to virtual SGPR
- ; CHECK: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; CHECK: *** Bad machine code: illegal copy from vector register to SGPR ***
; CHECK: - instruction: %6:sgpr_32 = COPY %0:vgpr_32
%6:sgpr_32 = COPY %0:vgpr_32
; copy from virtual VGPR to physical SGPR
- ; CHECK: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; CHECK: *** Bad machine code: illegal copy from vector register to SGPR ***
; CHECK: - instruction: $sgpr0 = COPY %0:vgpr_32
$sgpr0 = COPY %0:vgpr_32
; copy from physical VGPR to physical SGPR
- ; CHECK: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; CHECK: *** Bad machine code: illegal copy from vector register to SGPR ***
; CHECK: - instruction: $sgpr1 = COPY $vgpr0
$sgpr1 = COPY $vgpr0
; copy from virtual AGPR to virtual SGPR
- ; CHECK: *** Bad machine code: illegal copy from AGPR to SGPR ***
+ ; CHECK: *** Bad machine code: illegal copy from vector register to SGPR ***
; CHECK: - instruction: %7:sgpr_32 = COPY %4:agpr_32
%7:sgpr_32 = COPY %4:agpr_32
; copy from virtual AGPR to physical SGPR
- ; CHECK: *** Bad machine code: illegal copy from AGPR to SGPR ***
+ ; CHECK: *** Bad machine code: illegal copy from vector register to SGPR ***
; CHECK: - instruction: $sgpr2 = COPY %4:agpr_32
$sgpr2 = COPY %4:agpr_32
; copy from physical AGPR to physical SGPR
- ; CHECK: *** Bad machine code: illegal copy from AGPR to SGPR ***
+ ; CHECK: *** Bad machine code: illegal copy from vector register to SGPR ***
; CHECK: - instruction: $sgpr3 = COPY $agpr0
$sgpr3 = COPY $agpr0
; copy from tuple of physical VGPRs to tuple of physical SGPRs
- ; CHECK: *** Bad machine code: illegal copy from VGPR to SGPR ***
+ ; CHECK: *** Bad machine code: illegal copy from vector register to SGPR ***
; CHECK: - instruction: $sgpr4_sgpr5 = COPY $vgpr0_vgpr1
$sgpr4_sgpr5 = COPY $vgpr0_vgpr1
; copy from tuple of physical AGPRs to tuple of physical SGPRs
- ; CHECK: *** Bad machine code: illegal copy from AGPR to SGPR ***
+ ; CHECK: *** Bad machine code: illegal copy from vector register to SGPR ***
; CHECK: - instruction: $sgpr6_sgpr7 = COPY $agpr0_agpr1
$sgpr6_sgpr7 = COPY $agpr0_agpr1
>From 320bc25058da95080eea12799db58752d60ae7f6 Mon Sep 17 00:00:00 2001
From: AditiRM <Aditi.Medhane at amd.com>
Date: Tue, 3 Sep 2024 11:17:25 +0530
Subject: [PATCH 4/5] Handle review comments
Removed the redundancy of isGenericOpcode check, now isCopy check is indepenent of Generic Opcodes (GISEL pipeline) & SDAG piepeline
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 63b5d643cc58c6..89c10158e608fd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4622,13 +4622,13 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
const MachineFunction *MF = MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
+ // FIXME: At this point the COPY verify is done only for non-ssa forms.
+ // Find a better property to recognize the point where instruction selection is just done.
+ // We can only enforce this check after SIFixSGPRCopies pass.
+ if (!MRI.isSSA() && MI.isCopy())
+ return verifyCopy(MI, MRI, ErrInfo);
+
if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) {
- // FIXME: At this point the COPY verify is done only for non-ssa forms.
- // Find a better property to recognize the point where instruction selection
- // is just done.
- // We can only enforce this check after SIFixSGPRCopies pass.
- if (!MRI.isSSA() && MI.isCopy())
- return verifyCopy(MI, MRI, ErrInfo);
return true;
}
>From c53b6835e88bebe70553eec15611cd1761ad0497 Mon Sep 17 00:00:00 2001
From: AditiRM <Aditi.Medhane at amd.com>
Date: Tue, 3 Sep 2024 15:30:20 +0530
Subject: [PATCH 5/5] Handle review comments
Handle testcase position and rename testcase accordingly, formatting recheck
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 1058 ++++++++---------
.../fix-illegal-vector-copies.mir} | 3 +-
2 files changed, 518 insertions(+), 543 deletions(-)
rename llvm/test/MachineVerifier/{fix-illegal-copies.mir => AMDGPU/fix-illegal-vector-copies.mir} (97%)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 89c10158e608fd..8f2a9b2ee9f6a5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -51,18 +51,18 @@ namespace llvm::AMDGPU {
// code. This is only for making it possible to write reasonably small tests for
// long branches.
static cl::opt<unsigned>
-BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
- cl::desc("Restrict range of branch instructions (DEBUG)"));
+ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
+ cl::desc("Restrict range of branch instructions (DEBUG)"));
static cl::opt<bool> Fix16BitCopies(
- "amdgpu-fix-16-bit-physreg-copies",
- cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"),
- cl::init(true),
- cl::ReallyHidden);
+ "amdgpu-fix-16-bit-physreg-copies",
+ cl::desc(
+ "Fix copies between 32 and 16 bit registers by extending to 32 bit"),
+ cl::init(true), cl::ReallyHidden);
SIInstrInfo::SIInstrInfo(const GCNSubtarget &ST)
- : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
- RI(ST), ST(ST) {
+ : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
+ RI(ST), ST(ST) {
SchedModel.init(&ST);
}
@@ -79,7 +79,7 @@ static unsigned getNumOperandsNoGlue(SDNode *Node) {
/// Returns true if both nodes have the same value for the given
/// operand \p Op, or if both nodes do not have this operand.
-static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
+static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, unsigned OpName) {
unsigned Opc0 = N0->getMachineOpcode();
unsigned Opc1 = N1->getMachineOpcode();
@@ -89,9 +89,7 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
if (Op0Idx == -1 && Op1Idx == -1)
return true;
-
- if ((Op0Idx == -1 && Op1Idx != -1) ||
- (Op1Idx == -1 && Op0Idx != -1))
+ if ((Op0Idx == -1 && Op1Idx != -1) || (Op1Idx == -1 && Op0Idx != -1))
return false;
// getNamedOperandIdx returns the index for the MachineInstr's operands,
@@ -614,14 +612,13 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
C.diagnose(IllegalCopy);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
/// Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908. It is not
/// possible to have a direct copy in these cases on GFX908, so an intermediate
/// VGPR copy is required.
-static void indirectCopyToAGPR(const SIInstrInfo &TII,
- MachineBasicBlock &MBB,
+static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc,
@@ -646,7 +643,7 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
// reuse previous accvgpr_writes. Otherwise, we may incorrectly pick up
// an accvgpr_write used for this same copy due to implicit-defs
if (!RegsOverlap) {
- for (auto Def = MI, E = MBB.begin(); Def != E; ) {
+ for (auto Def = MI, E = MBB.begin(); Def != E;) {
--Def;
if (!Def->modifiesRegister(SrcReg, &RI))
@@ -674,14 +671,15 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
}
MachineInstrBuilder Builder =
- BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
- .add(DefOp);
+ BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
+ DestReg)
+ .add(DefOp);
if (ImpDefSuperReg)
Builder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit);
if (ImpUseSuperReg) {
Builder.addReg(ImpUseSuperReg,
- getKillRegState(KillSrc) | RegState::Implicit);
+ getKillRegState(KillSrc) | RegState::Implicit);
}
return;
@@ -693,8 +691,8 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
// Ideally we want to have three registers for a long reg_sequence copy
// to hide 2 waitstates between v_mov_b32 and accvgpr_write.
- unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
- *MBB.getParent());
+ unsigned MaxVGPRs =
+ RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass, *MBB.getParent());
// Registers in the sequence are allocated contiguously so we can just
// use register number to pick one of three round-robin temps.
@@ -724,16 +722,17 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
}
- MachineInstrBuilder UseBuilder = BuildMI(MBB, MI, DL, TII.get(TmpCopyOp), Tmp)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ MachineInstrBuilder UseBuilder =
+ BuildMI(MBB, MI, DL, TII.get(TmpCopyOp), Tmp)
+ .addReg(SrcReg, getKillRegState(KillSrc));
if (ImpUseSuperReg) {
UseBuilder.addReg(ImpUseSuperReg,
getKillRegState(KillSrc) | RegState::Implicit);
}
- MachineInstrBuilder DefBuilder
- = BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
- .addReg(Tmp, RegState::Kill);
+ MachineInstrBuilder DefBuilder =
+ BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
+ .addReg(Tmp, RegState::Kill);
if (ImpDefSuperReg)
DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit);
@@ -829,15 +828,15 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
AMDGPU::SReg_32RegClass.contains(SrcReg) ||
AMDGPU::AGPR_32RegClass.contains(SrcReg));
- unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
- AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
+ unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg)
+ ? AMDGPU::V_ACCVGPR_READ_B32_e64
+ : AMDGPU::V_MOV_B32_e32;
BuildMI(MBB, MI, DL, get(Opc), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
- if (RC == &AMDGPU::SReg_32_XM0RegClass ||
- RC == &AMDGPU::SReg_32RegClass) {
+ if (RC == &AMDGPU::SReg_32_XM0RegClass || RC == &AMDGPU::SReg_32RegClass) {
if (SrcReg == AMDGPU::SCC) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
.addImm(1)
@@ -848,13 +847,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (DestReg == AMDGPU::VCC_LO) {
if (AMDGPU::SReg_32RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), AMDGPU::VCC_LO)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
} else {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
- .addImm(0)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
return;
@@ -866,7 +865,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -881,13 +880,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (DestReg == AMDGPU::VCC) {
if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
} else {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
- .addImm(0)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addImm(0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
return;
@@ -899,7 +898,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -928,13 +927,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
(ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AMDGPU::AGPR_32RegClass.contains(SrcReg) && ST.hasGFX90AInsts()) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_MOV_B32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -942,7 +941,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// every AGPR spill.
RegScavenger RS;
const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
- indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS, Overlap);
+ indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS,
+ Overlap);
return;
}
@@ -967,7 +967,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg)
- .addReg(NewSrcReg, getKillRegState(KillSrc));
+ .addReg(NewSrcReg, getKillRegState(KillSrc));
return;
}
@@ -1007,20 +1007,20 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg)
- .addReg(NewSrcReg, getKillRegState(KillSrc));
+ .addReg(NewSrcReg, getKillRegState(KillSrc));
return;
}
auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg)
- .addImm(0) // src0_modifiers
- .addReg(NewSrcReg)
- .addImm(0) // clamp
- .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
- : AMDGPU::SDWA::SdwaSel::WORD_1)
- .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
- .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
- : AMDGPU::SDWA::SdwaSel::WORD_1)
- .addReg(NewDestReg, RegState::Implicit | RegState::Undef);
+ .addImm(0) // src0_modifiers
+ .addReg(NewSrcReg)
+ .addImm(0) // clamp
+ .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
+ : AMDGPU::SDWA::SdwaSel::WORD_1)
+ .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
+ .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
+ : AMDGPU::SDWA::SdwaSel::WORD_1)
+ .addReg(NewDestReg, RegState::Implicit | RegState::Undef);
// First implicit operand is $exec.
MIB->tieOperands(0, MIB->getNumOperands() - 1);
return;
@@ -1029,21 +1029,21 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
if (ST.hasMovB64()) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_e32), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (ST.hasPkMovB32()) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg)
- .addImm(SISrcMods::OP_SEL_1)
- .addReg(SrcReg)
- .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
- .addReg(SrcReg)
- .addImm(0) // op_sel_lo
- .addImm(0) // op_sel_hi
- .addImm(0) // neg_lo
- .addImm(0) // neg_hi
- .addImm(0) // clamp
- .addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit);
+ .addImm(SISrcMods::OP_SEL_1)
+ .addReg(SrcReg)
+ .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
+ .addReg(SrcReg)
+ .addImm(0) // op_sel_lo
+ .addImm(0) // op_sel_hi
+ .addImm(0) // neg_lo
+ .addImm(0) // neg_hi
+ .addImm(0) // clamp
+ .addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit);
return;
}
}
@@ -1173,27 +1173,23 @@ void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
RegClass == &AMDGPU::SGPR_32RegClass ||
RegClass == &AMDGPU::SReg_32_XM0RegClass ||
RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
- .addImm(Value);
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg).addImm(Value);
return;
}
if (RegClass == &AMDGPU::SReg_64RegClass ||
RegClass == &AMDGPU::SGPR_64RegClass ||
RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
- BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
- .addImm(Value);
+ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg).addImm(Value);
return;
}
if (RegClass == &AMDGPU::VGPR_32RegClass) {
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
- .addImm(Value);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg).addImm(Value);
return;
}
if (RegClass->hasSuperClassEq(&AMDGPU::VReg_64RegClass)) {
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg)
- .addImm(Value);
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO), DestReg).addImm(Value);
return;
}
@@ -1201,7 +1197,7 @@ void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.isSGPRClass(RegClass)) {
if (RI.getRegSizeInBits(*RegClass) > 32) {
- Opcode = AMDGPU::S_MOV_B64;
+ Opcode = AMDGPU::S_MOV_B64;
EltSize = 8;
} else {
Opcode = AMDGPU::S_MOV_B32;
@@ -1213,8 +1209,8 @@ void SIInstrInfo::materializeImmediate(MachineBasicBlock &MBB,
for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
int64_t IdxValue = Idx == 0 ? Value : 0;
- MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
- get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx]));
+ MachineInstrBuilder Builder = BuildMI(
+ MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx]));
Builder.addImm(IdxValue);
}
}
@@ -1232,57 +1228,59 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *BoolXExecRC =
- RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+ RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
assert(MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
"Not a VGPR32 reg");
if (Cond.size() == 1) {
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
- BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
- .add(Cond[0]);
+ BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg).add(Cond[0]);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
- .addImm(0)
- .addReg(FalseReg)
- .addImm(0)
- .addReg(TrueReg)
- .addReg(SReg);
+ .addImm(0)
+ .addReg(FalseReg)
+ .addImm(0)
+ .addReg(TrueReg)
+ .addReg(SReg);
} else if (Cond.size() == 2) {
assert(Cond[0].isImm() && "Cond[0] is not an immediate");
switch (Cond[0].getImm()) {
case SIInstrInfo::SCC_TRUE: {
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64), SReg)
- .addImm(1)
- .addImm(0);
+ BuildMI(
+ MBB, I, DL,
+ get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64),
+ SReg)
+ .addImm(1)
+ .addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
- .addImm(0)
- .addReg(FalseReg)
- .addImm(0)
- .addReg(TrueReg)
- .addReg(SReg);
+ .addImm(0)
+ .addReg(FalseReg)
+ .addImm(0)
+ .addReg(TrueReg)
+ .addReg(SReg);
break;
}
case SIInstrInfo::SCC_FALSE: {
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64), SReg)
- .addImm(0)
- .addImm(1);
+ BuildMI(
+ MBB, I, DL,
+ get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64),
+ SReg)
+ .addImm(0)
+ .addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
- .addImm(0)
- .addReg(FalseReg)
- .addImm(0)
- .addReg(TrueReg)
- .addReg(SReg);
+ .addImm(0)
+ .addReg(FalseReg)
+ .addImm(0)
+ .addReg(TrueReg)
+ .addReg(SReg);
break;
}
case SIInstrInfo::VCCNZ: {
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
- BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
- .add(RegOp);
+ BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg).add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(FalseReg)
@@ -1295,8 +1293,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
MachineOperand RegOp = Cond[1];
RegOp.setImplicit(false);
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
- BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
- .add(RegOp);
+ BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg).add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.addReg(TrueReg)
@@ -1308,37 +1305,45 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
case SIInstrInfo::EXECNZ: {
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
- : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
- .addImm(0);
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64), SReg)
- .addImm(1)
- .addImm(0);
+ BuildMI(MBB, I, DL,
+ get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
+ : AMDGPU::S_OR_SAVEEXEC_B64),
+ SReg2)
+ .addImm(0);
+ BuildMI(
+ MBB, I, DL,
+ get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64),
+ SReg)
+ .addImm(1)
+ .addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
- .addImm(0)
- .addReg(FalseReg)
- .addImm(0)
- .addReg(TrueReg)
- .addReg(SReg);
+ .addImm(0)
+ .addReg(FalseReg)
+ .addImm(0)
+ .addReg(TrueReg)
+ .addReg(SReg);
break;
}
case SIInstrInfo::EXECZ: {
Register SReg = MRI.createVirtualRegister(BoolXExecRC);
Register SReg2 = MRI.createVirtualRegister(RI.getBoolRC());
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
- : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
- .addImm(0);
- BuildMI(MBB, I, DL, get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
- : AMDGPU::S_CSELECT_B64), SReg)
- .addImm(0)
- .addImm(1);
+ BuildMI(MBB, I, DL,
+ get(ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32
+ : AMDGPU::S_OR_SAVEEXEC_B64),
+ SReg2)
+ .addImm(0);
+ BuildMI(
+ MBB, I, DL,
+ get(ST.isWave32() ? AMDGPU::S_CSELECT_B32 : AMDGPU::S_CSELECT_B64),
+ SReg)
+ .addImm(0)
+ .addImm(1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
- .addImm(0)
- .addReg(FalseReg)
- .addImm(0)
- .addReg(TrueReg)
- .addReg(SReg);
+ .addImm(0)
+ .addReg(FalseReg)
+ .addImm(0)
+ .addReg(TrueReg)
+ .addReg(SReg);
llvm_unreachable("Unhandled branch predicate EXECZ");
break;
}
@@ -1352,26 +1357,26 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB,
Register SIInstrInfo::insertEQ(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL,
- Register SrcReg, int Value) const {
+ const DebugLoc &DL, Register SrcReg,
+ int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Register Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_EQ_I32_e64), Reg)
- .addImm(Value)
- .addReg(SrcReg);
+ .addImm(Value)
+ .addReg(SrcReg);
return Reg;
}
Register SIInstrInfo::insertNE(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &DL,
- Register SrcReg, int Value) const {
+ const DebugLoc &DL, Register SrcReg,
+ int Value) const {
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
Register Reg = MRI.createVirtualRegister(RI.getBoolRC());
BuildMI(*MBB, I, DL, get(AMDGPU::V_CMP_NE_I32_e64), Reg)
- .addImm(Value)
- .addReg(SrcReg);
+ .addImm(Value)
+ .addReg(SrcReg);
return Reg;
}
@@ -1724,8 +1729,8 @@ void SIInstrInfo::storeRegToStackSlot(
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const DebugLoc &DL = MBB.findDebugLoc(MI);
- MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex);
MachineMemOperand *MMO = MF->getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FrameIndex),
FrameInfo.getObjectAlign(FrameIndex));
@@ -1749,10 +1754,10 @@ void SIInstrInfo::storeRegToStackSlot(
}
BuildMI(MBB, MI, DL, OpDesc)
- .addReg(SrcReg, getKillRegState(isKill)) // data
- .addFrameIndex(FrameIndex) // addr
- .addMemOperand(MMO)
- .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
+ .addReg(SrcReg, getKillRegState(isKill)) // data
+ .addFrameIndex(FrameIndex) // addr
+ .addMemOperand(MMO)
+ .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
if (RI.spillSGPRToVGPR())
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
@@ -1764,11 +1769,11 @@ void SIInstrInfo::storeRegToStackSlot(
MFI->setHasSpilledVGPRs();
BuildMI(MBB, MI, DL, get(Opcode))
- .addReg(SrcReg, getKillRegState(isKill)) // data
- .addFrameIndex(FrameIndex) // addr
- .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
- .addImm(0) // offset
- .addMemOperand(MMO);
+ .addReg(SrcReg, getKillRegState(isKill)) // data
+ .addFrameIndex(FrameIndex) // addr
+ .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+ .addImm(0) // offset
+ .addMemOperand(MMO);
}
static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
@@ -1952,8 +1957,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const DebugLoc &DL = MBB.findDebugLoc(MI);
unsigned SpillSize = TRI->getSpillSize(*RC);
- MachinePointerInfo PtrInfo
- = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex);
MachineMemOperand *MMO = MF->getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FrameIndex),
@@ -1976,9 +1981,9 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
if (RI.spillSGPRToVGPR())
FrameInfo.setStackID(FrameIndex, TargetStackID::SGPRSpill);
BuildMI(MBB, MI, DL, OpDesc, DestReg)
- .addFrameIndex(FrameIndex) // addr
- .addMemOperand(MMO)
- .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
+ .addFrameIndex(FrameIndex) // addr
+ .addMemOperand(MMO)
+ .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);
return;
}
@@ -2093,8 +2098,8 @@ unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
case AMDGPU::S_NOP:
return MI.getOperand(0).getImm() + 1;
- // SI_RETURN_TO_EPILOG is a fallthrough to code outside of the function. The
- // hazard, even if one exist, won't really be visible. Should we handle it?
+ // SI_RETURN_TO_EPILOG is a fallthrough to code outside of the function. The
+ // hazard, even if one exist, won't really be visible. Should we handle it?
}
}
@@ -2103,7 +2108,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
switch (MI.getOpcode()) {
- default: return TargetInstrInfo::expandPostRAPseudo(MI);
+ default:
+ return TargetInstrInfo::expandPostRAPseudo(MI);
case AMDGPU::S_MOV_B64_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
@@ -2202,44 +2208,44 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
APInt Hi(32, Imm.getHiBits(32).getZExtValue());
if (ST.hasPkMovB32() && Lo == Hi && isInlineConstant(Lo)) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst)
- .addImm(SISrcMods::OP_SEL_1)
- .addImm(Lo.getSExtValue())
- .addImm(SISrcMods::OP_SEL_1)
- .addImm(Lo.getSExtValue())
- .addImm(0) // op_sel_lo
- .addImm(0) // op_sel_hi
- .addImm(0) // neg_lo
- .addImm(0) // neg_hi
- .addImm(0); // clamp
+ .addImm(SISrcMods::OP_SEL_1)
+ .addImm(Lo.getSExtValue())
+ .addImm(SISrcMods::OP_SEL_1)
+ .addImm(Lo.getSExtValue())
+ .addImm(0) // op_sel_lo
+ .addImm(0) // op_sel_hi
+ .addImm(0) // neg_lo
+ .addImm(0) // neg_hi
+ .addImm(0); // clamp
} else {
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
- .addImm(Lo.getSExtValue())
- .addReg(Dst, RegState::Implicit | RegState::Define);
+ .addImm(Lo.getSExtValue())
+ .addReg(Dst, RegState::Implicit | RegState::Define);
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
- .addImm(Hi.getSExtValue())
- .addReg(Dst, RegState::Implicit | RegState::Define);
+ .addImm(Hi.getSExtValue())
+ .addReg(Dst, RegState::Implicit | RegState::Define);
}
} else {
assert(SrcOp.isReg());
if (ST.hasPkMovB32() &&
!RI.isAGPR(MBB.getParent()->getRegInfo(), SrcOp.getReg())) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), Dst)
- .addImm(SISrcMods::OP_SEL_1) // src0_mod
- .addReg(SrcOp.getReg())
- .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) // src1_mod
- .addReg(SrcOp.getReg())
- .addImm(0) // op_sel_lo
- .addImm(0) // op_sel_hi
- .addImm(0) // neg_lo
- .addImm(0) // neg_hi
- .addImm(0); // clamp
+ .addImm(SISrcMods::OP_SEL_1) // src0_mod
+ .addReg(SrcOp.getReg())
+ .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) // src1_mod
+ .addReg(SrcOp.getReg())
+ .addImm(0) // op_sel_lo
+ .addImm(0) // op_sel_hi
+ .addImm(0) // neg_lo
+ .addImm(0) // neg_hi
+ .addImm(0); // clamp
} else {
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo)
- .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
- .addReg(Dst, RegState::Implicit | RegState::Define);
+ .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0))
+ .addReg(Dst, RegState::Implicit | RegState::Define);
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
- .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
- .addReg(Dst, RegState::Implicit | RegState::Define);
+ .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
+ .addReg(Dst, RegState::Implicit | RegState::Define);
}
}
MI.eraseFromParent();
@@ -2265,11 +2271,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
APInt Lo(32, Imm.getLoBits(32).getZExtValue());
APInt Hi(32, Imm.getHiBits(32).getZExtValue());
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstLo)
- .addImm(Lo.getSExtValue())
- .addReg(Dst, RegState::Implicit | RegState::Define);
+ .addImm(Lo.getSExtValue())
+ .addReg(Dst, RegState::Implicit | RegState::Define);
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DstHi)
- .addImm(Hi.getSExtValue())
- .addReg(Dst, RegState::Implicit | RegState::Define);
+ .addImm(Hi.getSExtValue())
+ .addReg(Dst, RegState::Implicit | RegState::Define);
MI.eraseFromParent();
break;
}
@@ -2283,9 +2289,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
- .add(MI.getOperand(2));
- BuildMI(MBB, MI, DL, get(NotOpc), Exec)
- .addReg(Exec);
+ .add(MI.getOperand(2));
+ BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
MI.eraseFromParent();
break;
}
@@ -2302,8 +2307,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.getOperand(0).getReg())
.add(MI.getOperand(2));
expandPostRAPseudo(*Copy);
- BuildMI(MBB, MI, DL, get(NotOpc), Exec)
- .addReg(Exec);
+ BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
MI.eraseFromParent();
break;
}
@@ -2353,11 +2357,12 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
assert(VecReg == MI.getOperand(1).getReg());
MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, OpDesc)
- .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
- .add(MI.getOperand(2))
- .addReg(VecReg, RegState::ImplicitDefine)
- .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
+ BuildMI(MBB, MI, DL, OpDesc)
+ .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
+ .add(MI.getOperand(2))
+ .addReg(VecReg, RegState::ImplicitDefine)
+ .addReg(VecReg,
+ RegState::Implicit | (IsUndef ? RegState::Undef : 0));
const int ImpDefIdx =
OpDesc.getNumOperands() + OpDesc.implicit_uses().size();
@@ -2504,8 +2509,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// This only gets its own opcode so that SIPreAllocateWWMRegs can tell when
// STRICT_WQM is entered.
const unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- const unsigned WQMOp = ST.isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
- const unsigned MovOp = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ const unsigned WQMOp =
+ ST.isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
+ const unsigned MovOp =
+ ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MI, DL, get(MovOp), MI.getOperand(0).getReg()).addReg(Exec);
BuildMI(MBB, MI, DL, get(WQMOp), Exec).addReg(Exec);
@@ -2644,13 +2651,13 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB,
TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, RI);
}
-std::pair<MachineInstr*, MachineInstr*>
+std::pair<MachineInstr *, MachineInstr *>
SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
- assert (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
+ assert(MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
if (ST.hasMovB64() &&
AMDGPU::isLegalDPALU_DPPControl(
- getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
+ getNamedOperand(MI, AMDGPU::OpName::dpp_ctrl)->getImm())) {
MI.setDesc(get(AMDGPU::V_MOV_B64_dpp));
return std::pair(&MI, nullptr);
}
@@ -2663,7 +2670,7 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
unsigned Part = 0;
MachineInstr *Split[2];
- for (auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
+ for (auto Sub : {AMDGPU::sub0, AMDGPU::sub1}) {
auto MovDPP = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_dpp));
if (Dst.isPhysical()) {
MovDPP.addDef(RI.getSubReg(Dst, Sub));
@@ -2699,10 +2706,10 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
if (Dst.isVirtual())
BuildMI(MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), Dst)
- .addReg(Split[0]->getOperand(0).getReg())
- .addImm(AMDGPU::sub0)
- .addReg(Split[1]->getOperand(0).getReg())
- .addImm(AMDGPU::sub1);
+ .addReg(Split[0]->getOperand(0).getReg())
+ .addImm(AMDGPU::sub0)
+ .addReg(Split[1]->getOperand(0).getReg())
+ .addImm(AMDGPU::sub1);
MI.eraseFromParent();
return std::pair(Split[0], Split[1]);
@@ -2716,10 +2723,8 @@ SIInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
return std::nullopt;
}
-bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
- MachineOperand &Src0,
- unsigned Src0OpName,
- MachineOperand &Src1,
+bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0,
+ unsigned Src0OpName, MachineOperand &Src1,
unsigned Src1OpName) const {
MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
if (!Src0Mods)
@@ -2760,7 +2765,8 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
// Make sure we don't reinterpret a subreg index in the target flags.
RegOp.setTargetFlags(NonRegOp.getTargetFlags());
- NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
+ NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef,
+ IsDebug);
NonRegOp.setSubReg(SubReg);
return &MI;
@@ -2780,9 +2786,9 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
std::swap(Src0Idx, Src1Idx);
assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
- static_cast<int>(Src0Idx) &&
+ static_cast<int>(Src0Idx) &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
- static_cast<int>(Src1Idx) &&
+ static_cast<int>(Src1Idx) &&
"inconsistency with findCommutedOpIndices");
MachineOperand &Src0 = MI.getOperand(Src0Idx);
@@ -2792,8 +2798,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (Src0.isReg() && Src1.isReg()) {
if (isOperandLegal(MI, Src1Idx, &Src0)) {
// Be sure to copy the source modifiers to the right place.
- CommutedMI
- = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
+ CommutedMI =
+ TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
}
} else if (Src0.isReg() && !Src1.isReg()) {
@@ -2809,8 +2815,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
}
if (CommutedMI) {
- swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
- Src1, AMDGPU::OpName::src1_modifiers);
+ swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers, Src1,
+ AMDGPU::OpName::src1_modifiers);
CommutedMI->setDesc(get(CommutedOpcode));
}
@@ -2920,8 +2926,7 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
.addSym(OffsetHi, MO_FAR_BRANCH_OFFSET);
// Insert the indirect branch after the other terminator.
- BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
- .addReg(PCReg);
+ BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64)).addReg(PCReg);
// If a spill is needed for the pc register pair, we need to insert a spill
// restore block right before the destination block, and insert a short branch
@@ -3143,11 +3148,9 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL,
- int *BytesAdded) const {
+ const DebugLoc &DL, int *BytesAdded) const {
if (!FBB && Cond.empty()) {
- BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
- .addMBB(TBB);
+ BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)).addMBB(TBB);
if (BytesAdded)
*BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
return 1;
@@ -3155,13 +3158,11 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
assert(TBB && Cond[0].isImm());
- unsigned Opcode
- = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
+ unsigned Opcode =
+ getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
if (!FBB) {
- MachineInstr *CondBr =
- BuildMI(&MBB, DL, get(Opcode))
- .addMBB(TBB);
+ MachineInstr *CondBr = BuildMI(&MBB, DL, get(Opcode)).addMBB(TBB);
// Copy the flags onto the implicit condition register operand.
preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
@@ -3174,12 +3175,9 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
assert(TBB && FBB);
- MachineInstr *CondBr =
- BuildMI(&MBB, DL, get(Opcode))
- .addMBB(TBB);
+ MachineInstr *CondBr = BuildMI(&MBB, DL, get(Opcode)).addMBB(TBB);
fixImplicitOperands(*CondBr);
- BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
- .addMBB(FBB);
+ BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)).addMBB(FBB);
MachineOperand &CondReg = CondBr->getOperand(1);
CondReg.setIsUndef(Cond[1].isUndef());
@@ -3192,7 +3190,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
}
bool SIInstrInfo::reverseBranchCondition(
- SmallVectorImpl<MachineOperand> &Cond) const {
+ SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond.size() != 2) {
return true;
}
@@ -3248,9 +3246,10 @@ bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
}
void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, const DebugLoc &DL,
- Register DstReg, ArrayRef<MachineOperand> Cond,
- Register TrueReg, Register FalseReg) const {
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, Register DstReg,
+ ArrayRef<MachineOperand> Cond, Register TrueReg,
+ Register FalseReg) const {
BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
if (Pred == VCCZ || Pred == SCC_FALSE) {
Pred = static_cast<BranchPredicate>(-Pred);
@@ -3265,13 +3264,13 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineInstr *Select;
if (Pred == SCC_TRUE) {
Select = BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B32), DstReg)
- .addReg(TrueReg)
- .addReg(FalseReg);
+ .addReg(TrueReg)
+ .addReg(FalseReg);
} else {
// Instruction's operands are backwards from what is expected.
Select = BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e32), DstReg)
- .addReg(FalseReg)
- .addReg(TrueReg);
+ .addReg(FalseReg)
+ .addReg(TrueReg);
}
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
@@ -3280,26 +3279,25 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
if (DstSize == 64 && Pred == SCC_TRUE) {
MachineInstr *Select =
- BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
- .addReg(TrueReg)
- .addReg(FalseReg);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
+ .addReg(TrueReg)
+ .addReg(FalseReg);
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
return;
}
static const int16_t Sub0_15[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
};
static const int16_t Sub0_15_64[] = {
- AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
- AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
- AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
- AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, AMDGPU::sub4_sub5,
+ AMDGPU::sub6_sub7, AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+ AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
};
unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
@@ -3321,8 +3319,8 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
}
}
- MachineInstrBuilder MIB = BuildMI(
- MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
I = MIB->getIterator();
@@ -3335,22 +3333,19 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineInstr *Select;
if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
- Select =
- BuildMI(MBB, I, DL, get(SelOp), DstElt)
- .addReg(FalseReg, 0, SubIdx)
- .addReg(TrueReg, 0, SubIdx);
+ Select = BuildMI(MBB, I, DL, get(SelOp), DstElt)
+ .addReg(FalseReg, 0, SubIdx)
+ .addReg(TrueReg, 0, SubIdx);
} else {
- Select =
- BuildMI(MBB, I, DL, get(SelOp), DstElt)
- .addReg(TrueReg, 0, SubIdx)
- .addReg(FalseReg, 0, SubIdx);
+ Select = BuildMI(MBB, I, DL, get(SelOp), DstElt)
+ .addReg(TrueReg, 0, SubIdx)
+ .addReg(FalseReg, 0, SubIdx);
}
preserveCondRegFlags(Select->getOperand(3), Cond[1]);
fixImplicitOperands(*Select);
- MIB.addReg(DstElt)
- .addImm(SubIdx);
+ MIB.addReg(DstElt).addImm(SubIdx);
}
}
@@ -3446,10 +3441,10 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
bool Is16Bit = OpSize == 2;
bool Is64Bit = OpSize == 8;
bool isVGPRCopy = RI.isVGPR(*MRI, DstReg);
- unsigned NewOpc = isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
- : AMDGPU::V_MOV_B32_e32
- : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
- : AMDGPU::S_MOV_B32;
+ unsigned NewOpc =
+ isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32
+ : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
+ : AMDGPU::S_MOV_B32;
APInt Imm(Is64Bit ? 64 : 32, getImmFor(UseMI.getOperand(1)));
if (RI.isAGPR(*MRI, DstReg)) {
@@ -3708,8 +3703,8 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
return offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1);
}
-bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
- const MachineInstr &MIb) const {
+bool SIInstrInfo::areMemAccessesTriviallyDisjoint(
+ const MachineInstr &MIa, const MachineInstr &MIb) const {
assert(MIa.mayLoadOrStore() &&
"MIa must load from or modify a memory location");
assert(MIb.mayLoadOrStore() &&
@@ -3900,8 +3895,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
case AMDGPU::V_FMAC_F32_e32:
case AMDGPU::V_FMAC_LEGACY_F32_e32:
case AMDGPU::V_FMAC_F64_e32: {
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::src0);
+ int Src0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
if (!Src0->isReg() && !Src0->isImm())
return nullptr;
@@ -3917,10 +3912,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
const MachineOperand *Src0Mods =
- getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
+ getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
const MachineOperand *Src1Mods =
- getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
+ getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
const MachineOperand *Src2Mods =
getNamedOperand(MI, AMDGPU::OpName::src2_modifiers);
@@ -4019,14 +4014,13 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
if (Src0Literal && !ST.hasVOP3Literal())
return nullptr;
- unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
- : IsF64 ? AMDGPU::V_FMA_F64_e64
- : IsLegacy
- ? AMDGPU::V_FMA_LEGACY_F32_e64
- : AMDGPU::V_FMA_F32_e64
- : IsF16 ? AMDGPU::V_MAD_F16_e64
- : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
- : AMDGPU::V_MAD_F32_e64;
+ unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
+ : IsF64 ? AMDGPU::V_FMA_F64_e64
+ : IsLegacy ? AMDGPU::V_FMA_LEGACY_F32_e64
+ : AMDGPU::V_FMA_F32_e64
+ : IsF16 ? AMDGPU::V_MAD_F16_e64
+ : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
+ : AMDGPU::V_MAD_F32_e64;
if (pseudoToMCOpcode(NewOpc) == -1)
return nullptr;
@@ -4090,8 +4084,7 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
- MI.getOpcode() == AMDGPU::S_SETPRIO ||
- changesVGPRIndexingMode(MI);
+ MI.getOpcode() == AMDGPU::S_SETPRIO || changesVGPRIndexingMode(MI);
}
bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
@@ -4105,7 +4098,8 @@ bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) {
return is_contained(MI.getDesc().implicit_defs(), AMDGPU::MODE);
}
-bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
+bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(
+ const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
if (MI.mayStore() && isSMRD(MI))
@@ -4248,8 +4242,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_C_INT64:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
- return AMDGPU::isInlinableLiteral64(MO.getImm(),
- ST.hasInv2PiInlineImm());
+ return AMDGPU::isInlinableLiteral64(MO.getImm(), ST.hasInv2PiInlineImm());
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -4356,8 +4349,8 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
if (MO.isImm() && isInlineConstant(MO, OpInfo)) {
if (isMAI(MI) && ST.hasMFMAInlineLiteralBug() &&
- OpNo ==(unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::src2))
+ OpNo == (unsigned)AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::src2))
return false;
return RI.opCanUseInlineConstant(OpInfo.OperandType);
}
@@ -4407,33 +4400,33 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
// Can't shrink instruction with three operands.
if (Src2) {
switch (MI.getOpcode()) {
- default: return false;
-
- case AMDGPU::V_ADDC_U32_e64:
- case AMDGPU::V_SUBB_U32_e64:
- case AMDGPU::V_SUBBREV_U32_e64: {
- const MachineOperand *Src1
- = getNamedOperand(MI, AMDGPU::OpName::src1);
- if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()))
- return false;
- // Additional verification is needed for sdst/src2.
- return true;
- }
- case AMDGPU::V_MAC_F16_e64:
- case AMDGPU::V_MAC_F32_e64:
- case AMDGPU::V_MAC_LEGACY_F32_e64:
- case AMDGPU::V_FMAC_F16_e64:
- case AMDGPU::V_FMAC_F16_t16_e64:
- case AMDGPU::V_FMAC_F32_e64:
- case AMDGPU::V_FMAC_F64_e64:
- case AMDGPU::V_FMAC_LEGACY_F32_e64:
- if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
- hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
- return false;
- break;
+ default:
+ return false;
- case AMDGPU::V_CNDMASK_B32_e64:
- break;
+ case AMDGPU::V_ADDC_U32_e64:
+ case AMDGPU::V_SUBB_U32_e64:
+ case AMDGPU::V_SUBBREV_U32_e64: {
+ const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (!Src1->isReg() || !RI.isVGPR(MRI, Src1->getReg()))
+ return false;
+ // Additional verification is needed for sdst/src2.
+ return true;
+ }
+ case AMDGPU::V_MAC_F16_e64:
+ case AMDGPU::V_MAC_F32_e64:
+ case AMDGPU::V_MAC_LEGACY_F32_e64:
+ case AMDGPU::V_FMAC_F16_e64:
+ case AMDGPU::V_FMAC_F16_t16_e64:
+ case AMDGPU::V_FMAC_F32_e64:
+ case AMDGPU::V_FMAC_F64_e64:
+ case AMDGPU::V_FMAC_LEGACY_F32_e64:
+ if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
+ hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
+ return false;
+ break;
+
+ case AMDGPU::V_CNDMASK_B32_e64:
+ break;
}
}
@@ -4478,8 +4471,7 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
const MCInstrDesc &Op32Desc = get(Op32);
MachineInstrBuilder Inst32 =
- BuildMI(*MBB, MI, MI.getDebugLoc(), Op32Desc)
- .setMIFlags(MI.getFlags());
+ BuildMI(*MBB, MI, MI.getDebugLoc(), Op32Desc).setMIFlags(MI.getFlags());
// Add the dst operand if the 32-bit encoding also has an explicit $vdst.
// For VOPC instructions, this is replaced by an implicit def of vcc.
@@ -4579,10 +4571,8 @@ static bool shouldReadExec(const MachineInstr &MI) {
return true;
}
- if (MI.isPreISelOpcode() ||
- SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
- SIInstrInfo::isSALU(MI) ||
- SIInstrInfo::isSMRD(MI))
+ if (MI.isPreISelOpcode() || SIInstrInfo::isGenericOpcode(MI.getOpcode()) ||
+ SIInstrInfo::isSALU(MI) || SIInstrInfo::isSMRD(MI))
return false;
return true;
@@ -4623,7 +4613,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
const MachineRegisterInfo &MRI = MF->getRegInfo();
// FIXME: At this point the COPY verify is done only for non-ssa forms.
- // Find a better property to recognize the point where instruction selection is just done.
+ // Find a better property to recognize the point where instruction selection
+ // is just done.
// We can only enforce this check after SIFixSGPRCopies pass.
if (!MRI.isSSA() && MI.isCopy())
return verifyCopy(MI, MRI, ErrInfo);
@@ -4794,7 +4785,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (!ST.hasSDWAScalar()) {
// Only VGPRS on VI
- if (!MO.isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
+ if (!MO.isReg() ||
+ !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.getReg()))) {
ErrInfo = "Only VGPRs allowed as operands in SDWA instructions on VI";
return false;
}
@@ -4802,7 +4794,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// No immediates on GFX9
if (!MO.isReg()) {
ErrInfo =
- "Only reg allowed as operands in SDWA instructions on GFX9+";
+ "Only reg allowed as operands in SDWA instructions on GFX9+";
return false;
}
}
@@ -4811,8 +4803,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (!ST.hasSDWAOmod()) {
// No omod allowed on VI
const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
- if (OMod != nullptr &&
- (!OMod->isImm() || OMod->getImm() != 0)) {
+ if (OMod != nullptr && (!OMod->isImm() || OMod->getImm() != 0)) {
ErrInfo = "OMod not allowed in SDWA instructions on VI";
return false;
}
@@ -4843,7 +4834,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
} else if (!ST.hasSDWAOutModsVOPC()) {
// No clamp allowed on GFX9 for VOPC
- const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
+ const MachineOperand *Clamp =
+ getNamedOperand(MI, AMDGPU::OpName::clamp);
if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
return false;
@@ -4858,7 +4850,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
}
- const MachineOperand *DstUnused = getNamedOperand(MI, AMDGPU::OpName::dst_unused);
+ const MachineOperand *DstUnused =
+ getNamedOperand(MI, AMDGPU::OpName::dst_unused);
if (DstUnused && DstUnused->isImm() &&
DstUnused->getImm() == AMDGPU::SDWA::UNUSED_PRESERVE) {
const MachineOperand &Dst = MI.getOperand(DstIdx);
@@ -5019,8 +5012,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
const MachineOperand &Src1 = MI.getOperand(Src1Idx);
const MachineOperand &Src2 = MI.getOperand(Src2Idx);
if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
- if (!compareMachineOp(Src0, Src1) &&
- !compareMachineOp(Src0, Src2)) {
+ if (!compareMachineOp(Src0, Src1) && !compareMachineOp(Src0, Src2)) {
ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
return false;
}
@@ -5107,8 +5099,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
const MachineOperand &Src0 = MI.getOperand(Src0Idx);
- const MachineOperand &ImpUse
- = MI.getOperand(StaticNumOps + NumImplicitOps - 1);
+ const MachineOperand &ImpUse =
+ MI.getOperand(StaticNumOps + NumImplicitOps - 1);
if (!ImpUse.isReg() || !ImpUse.isUse() ||
!isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
ErrInfo = "src0 should be subreg of implicit vector use";
@@ -5157,8 +5149,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (isImage(MI)) {
const MachineOperand *DimOp = getNamedOperand(MI, AMDGPU::OpName::dim);
if (DimOp) {
- int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
- AMDGPU::OpName::vaddr0);
+ int VAddr0Idx =
+ AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vaddr0);
int RSrcOpName =
isMIMG(MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
@@ -5175,7 +5167,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
bool IsA16 = false;
if (ST.hasR128A16()) {
- const MachineOperand *R128A16 = getNamedOperand(MI, AMDGPU::OpName::r128);
+ const MachineOperand *R128A16 =
+ getNamedOperand(MI, AMDGPU::OpName::r128);
IsA16 = R128A16->getImm() != 0;
} else if (ST.hasA16()) {
const MachineOperand *A16 = getNamedOperand(MI, AMDGPU::OpName::a16);
@@ -5240,8 +5233,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
ST.getGeneration() < AMDGPUSubtarget::GFX10) {
if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
- DC <= DppCtrl::ROW_NEWBCAST_LAST &&
- !ST.hasGFX90AInsts()) {
+ DC <= DppCtrl::ROW_NEWBCAST_LAST && !ST.hasGFX90AInsts()) {
ErrInfo = "Invalid dpp_ctrl value: "
"row_newbroadcast/row_share is not supported before "
"GFX90A/GFX10";
@@ -5264,8 +5256,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
if ((MI.mayStore() || MI.mayLoad()) && !isVGPRSpill(MI)) {
const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
- uint16_t DataNameIdx = isDS(Opcode) ? AMDGPU::OpName::data0
- : AMDGPU::OpName::vdata;
+ uint16_t DataNameIdx =
+ isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
const MachineOperand *Data = getNamedOperand(MI, DataNameIdx);
const MachineOperand *Data2 = getNamedOperand(MI, AMDGPU::OpName::data1);
if (Data && !Data->isReg())
@@ -5594,10 +5586,10 @@ adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
return RI.getProperlyAlignedRC(RI.getRegClass(RCID));
}
-const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
- unsigned OpNum, const TargetRegisterInfo *TRI,
- const MachineFunction &MF)
- const {
+const TargetRegisterClass *
+SIInstrInfo::getRegClass(const MCInstrDesc &TID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const {
if (OpNum >= TID.getNumOperands())
return nullptr;
auto RegClass = TID.operands()[OpNum].RegClass;
@@ -5610,11 +5602,11 @@ const TargetRegisterClass *SIInstrInfo::getRegClass(const MCInstrDesc &TID,
//
// The check is limited to FLAT and DS because atomics in non-flat encoding
// have their vdst and vdata tied to be the same register.
- const int VDstIdx = AMDGPU::getNamedOperandIdx(TID.Opcode,
- AMDGPU::OpName::vdst);
- const int DataIdx = AMDGPU::getNamedOperandIdx(TID.Opcode,
- (TID.TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
- : AMDGPU::OpName::vdata);
+ const int VDstIdx =
+ AMDGPU::getNamedOperandIdx(TID.Opcode, AMDGPU::OpName::vdst);
+ const int DataIdx = AMDGPU::getNamedOperandIdx(
+ TID.Opcode, (TID.TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
+ : AMDGPU::OpName::vdata);
if (DataIdx != -1) {
IsAllocatable = VDstIdx != -1 || AMDGPU::hasNamedOperand(
TID.Opcode, AMDGPU::OpName::data1);
@@ -5691,8 +5683,7 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
llvm_unreachable("Unhandled register index for immediate");
}
- unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC,
- SubIdx, SubRC);
+ unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, SubIdx, SubRC);
return MachineOperand::CreateReg(SubReg, false);
}
@@ -5799,14 +5790,13 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
if (IsAGPR && !ST.hasMAIInsts())
return false;
unsigned Opc = MI.getOpcode();
- if (IsAGPR &&
- (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) &&
+ if (IsAGPR && (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) &&
(MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc)))
return false;
// Atomics should have both vdst and vdata either vgpr or agpr.
const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
- const int DataIdx = AMDGPU::getNamedOperandIdx(Opc,
- isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
+ const int DataIdx = AMDGPU::getNamedOperandIdx(
+ Opc, isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
if ((int)OpIdx == VDstIdx && DataIdx != -1 &&
MI.getOperand(DataIdx).isReg() &&
RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR)
@@ -5816,8 +5806,8 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR)
return false;
// DS instructions with 2 src operands also must have tied RC.
- const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::data1);
+ const int Data1Idx =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() &&
RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR)
return false;
@@ -5983,11 +5973,9 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
- int VOP3Idx[3] = {
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
- AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
- };
+ int VOP3Idx[3] = {AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)};
if (Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
Opc == AMDGPU::V_PERMLANEX16_B32_e64) {
@@ -5998,13 +5986,13 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) {
Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
- .add(Src1);
+ .add(Src1);
Src1.ChangeToRegister(Reg, false);
}
if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) {
Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
- .add(Src2);
+ .add(Src2);
Src2.ChangeToRegister(Reg, false);
}
}
@@ -6186,13 +6174,14 @@ bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
assert(OldSAddrIdx == NewVAddrIdx);
if (OldVAddrIdx >= 0) {
- int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
- AMDGPU::OpName::vdst_in);
+ int NewVDstIn =
+ AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
// removeOperand doesn't try to fixup tied operand indexes at it goes, so
// it asserts. Untie the operands for now and retie them afterwards.
if (NewVDstIn != -1) {
- int OldVDstIn = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
+ int OldVDstIn =
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
Inst.untieRegOperand(OldVDstIn);
}
@@ -6239,8 +6228,8 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
Register OpReg = Op.getReg();
unsigned OpSubReg = Op.getSubReg();
- const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
- RI.getRegClassForReg(MRI, OpReg), OpSubReg);
+ const TargetRegisterClass *OpRC =
+ RI.getSubClassWithSubReg(RI.getRegClassForReg(MRI, OpReg), OpSubReg);
// Check if operand is already the correct register class.
if (DstRC == OpRC)
@@ -6287,8 +6276,7 @@ static void emitLoadScalarOpsFromVGPRLoop(
ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
unsigned XorTermOpc =
ST.isWave32() ? AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
- unsigned AndOpc =
- ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ unsigned AndOpc = ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
MachineBasicBlock::iterator I = LoopBB.begin();
@@ -6840,18 +6828,18 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
// NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
const DebugLoc &DL = MI.getDebugLoc();
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_CO_U32_e64), NewVAddrLo)
- .addDef(CondReg0)
- .addReg(RsrcPtr, 0, AMDGPU::sub0)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
- .addImm(0);
+ .addDef(CondReg0)
+ .addReg(RsrcPtr, 0, AMDGPU::sub0)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
+ .addImm(0);
// NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e64), NewVAddrHi)
- .addDef(CondReg1, RegState::Dead)
- .addReg(RsrcPtr, 0, AMDGPU::sub1)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
- .addReg(CondReg0, RegState::Kill)
- .addImm(0);
+ .addDef(CondReg1, RegState::Dead)
+ .addReg(RsrcPtr, 0, AMDGPU::sub1)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
+ .addReg(CondReg0, RegState::Kill)
+ .addImm(0);
// NewVaddr = {NewVaddrHi, NewVaddrLo}
BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
@@ -7312,9 +7300,9 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass());
auto NewInstr =
BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode), CondReg)
- .setMIFlags(Inst.getFlags());
- if (AMDGPU::getNamedOperandIdx(NewOpcode,
- AMDGPU::OpName::src0_modifiers) >= 0) {
+ .setMIFlags(Inst.getFlags());
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
+ 0) {
NewInstr
.addImm(0) // src0_modifiers
.add(Inst.getOperand(0)) // src0
@@ -7322,9 +7310,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
.add(Inst.getOperand(1)) // src1
.addImm(0); // clamp
} else {
- NewInstr
- .add(Inst.getOperand(0))
- .add(Inst.getOperand(1));
+ NewInstr.add(Inst.getOperand(0)).add(Inst.getOperand(1));
}
legalizeOperands(*NewInstr, MDT);
int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC, /*TRI=*/nullptr);
@@ -7430,8 +7416,8 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
if (isVOP3(NewOpcode) && !isVOP3(Opcode)) {
// Intersperse VOP3 modifiers among the SALU operands.
NewInstr->addOperand(Inst.getOperand(0));
- if (AMDGPU::getNamedOperandIdx(NewOpcode,
- AMDGPU::OpName::src0_modifiers) >= 0)
+ if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
+ 0)
NewInstr.addImm(0);
if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0)) {
MachineOperand Src = Inst.getOperand(1);
@@ -7536,8 +7522,8 @@ SIInstrInfo::moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
unsigned Opc = Inst.getOpcode();
assert(Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32);
- unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ?
- AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
+ unsigned NewOpc = Opc == AMDGPU::S_ADD_I32 ? AMDGPU::V_ADD_U32_e64
+ : AMDGPU::V_SUB_U32_e64;
assert(Inst.getOperand(3).getReg() == AMDGPU::SCC);
Inst.removeOperand(3);
@@ -7650,16 +7636,14 @@ void SIInstrInfo::lowerScalarAbs(SIInstrWorklist &Worklist,
Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned SubOp = ST.hasAddNoCarry() ?
- AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
+ unsigned SubOp =
+ ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
- BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
- .addImm(0)
- .addReg(Src.getReg());
+ BuildMI(MBB, MII, DL, get(SubOp), TmpReg).addImm(0).addReg(Src.getReg());
BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
- .addReg(Src.getReg())
- .addReg(TmpReg);
+ .addReg(Src.getReg())
+ .addReg(TmpReg);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
@@ -7682,8 +7666,8 @@ void SIInstrInfo::lowerScalarXnor(SIInstrWorklist &Worklist,
legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
BuildMI(MBB, MII, DL, get(AMDGPU::V_XNOR_B32_e64), NewDest)
- .add(Src0)
- .add(Src1);
+ .add(Src0)
+ .add(Src1);
MRI.replaceRegWith(Dest.getReg(), NewDest);
addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
@@ -7692,10 +7676,10 @@ void SIInstrInfo::lowerScalarXnor(SIInstrWorklist &Worklist,
// invert either source and then perform the XOR. If either source is a
// scalar register, then we can leave the inversion on the scalar unit to
// achieve a better distribution of scalar and vector instructions.
- bool Src0IsSGPR = Src0.isReg() &&
- RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
- bool Src1IsSGPR = Src1.isReg() &&
- RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
+ bool Src0IsSGPR =
+ Src0.isReg() && RI.isSGPRClass(MRI.getRegClass(Src0.getReg()));
+ bool Src1IsSGPR =
+ Src1.isReg() && RI.isSGPRClass(MRI.getRegClass(Src1.getReg()));
MachineInstr *Xor;
Register Temp = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
@@ -7706,17 +7690,17 @@ void SIInstrInfo::lowerScalarXnor(SIInstrWorklist &Worklist,
if (Src0IsSGPR) {
BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src0);
Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
- .addReg(Temp)
- .add(Src1);
+ .addReg(Temp)
+ .add(Src1);
} else if (Src1IsSGPR) {
BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Temp).add(Src1);
Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), NewDest)
- .add(Src0)
- .addReg(Temp);
+ .add(Src0)
+ .addReg(Temp);
} else {
Xor = BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B32), Temp)
- .add(Src0)
- .add(Src1);
+ .add(Src0)
+ .add(Src1);
MachineInstr *Not =
BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest).addReg(Temp);
Worklist.insert(Not);
@@ -7745,12 +7729,11 @@ void SIInstrInfo::splitScalarNotBinop(SIInstrWorklist &Worklist,
Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), Interm)
- .add(Src0)
- .add(Src1);
+ MachineInstr &Op =
+ *BuildMI(MBB, MII, DL, get(Opcode), Interm).add(Src0).add(Src1);
- MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest)
- .addReg(Interm);
+ MachineInstr &Not =
+ *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), NewDest).addReg(Interm);
Worklist.insert(&Op);
Worklist.insert(&Not);
@@ -7774,12 +7757,11 @@ void SIInstrInfo::splitScalarBinOpN2(SIInstrWorklist &Worklist,
Register NewDest = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- MachineInstr &Not = *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm)
- .add(Src1);
+ MachineInstr &Not =
+ *BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B32), Interm).add(Src1);
- MachineInstr &Op = *BuildMI(MBB, MII, DL, get(Opcode), NewDest)
- .add(Src0)
- .addReg(Interm);
+ MachineInstr &Op =
+ *BuildMI(MBB, MII, DL, get(Opcode), NewDest).add(Src0).addReg(Interm);
Worklist.insert(&Not);
Worklist.insert(&Op);
@@ -7801,15 +7783,14 @@ void SIInstrInfo::splitScalar64BitUnaryOp(SIInstrWorklist &Worklist,
MachineBasicBlock::iterator MII = Inst;
const MCInstrDesc &InstDesc = get(Opcode);
- const TargetRegisterClass *Src0RC = Src0.isReg() ?
- MRI.getRegClass(Src0.getReg()) :
- &AMDGPU::SGPR_32RegClass;
+ const TargetRegisterClass *Src0RC =
+ Src0.isReg() ? MRI.getRegClass(Src0.getReg()) : &AMDGPU::SGPR_32RegClass;
const TargetRegisterClass *Src0SubRC =
RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
- MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub0, Src0SubRC);
+ MachineOperand SrcReg0Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
@@ -7817,23 +7798,25 @@ void SIInstrInfo::splitScalar64BitUnaryOp(SIInstrWorklist &Worklist,
RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
- MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
+ MachineInstr &LoHalf =
+ *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
- MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub1, Src0SubRC);
+ MachineOperand SrcReg0Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
Register DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
- MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
+ MachineInstr &HiHalf =
+ *BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
if (Swap)
std::swap(DestSub0, DestSub1);
Register FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
- .addReg(DestSub0)
- .addImm(AMDGPU::sub0)
- .addReg(DestSub1)
- .addImm(AMDGPU::sub1);
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
@@ -8035,27 +8018,25 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SIInstrWorklist &Worklist,
MachineBasicBlock::iterator MII = Inst;
const MCInstrDesc &InstDesc = get(Opcode);
- const TargetRegisterClass *Src0RC = Src0.isReg() ?
- MRI.getRegClass(Src0.getReg()) :
- &AMDGPU::SGPR_32RegClass;
+ const TargetRegisterClass *Src0RC =
+ Src0.isReg() ? MRI.getRegClass(Src0.getReg()) : &AMDGPU::SGPR_32RegClass;
const TargetRegisterClass *Src0SubRC =
RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
- const TargetRegisterClass *Src1RC = Src1.isReg() ?
- MRI.getRegClass(Src1.getReg()) :
- &AMDGPU::SGPR_32RegClass;
+ const TargetRegisterClass *Src1RC =
+ Src1.isReg() ? MRI.getRegClass(Src1.getReg()) : &AMDGPU::SGPR_32RegClass;
const TargetRegisterClass *Src1SubRC =
RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
- MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub0, Src0SubRC);
- MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub0, Src1SubRC);
- MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
- AMDGPU::sub1, Src0SubRC);
- MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
- AMDGPU::sub1, Src1SubRC);
+ MachineOperand SrcReg0Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
+ MachineOperand SrcReg1Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
+ MachineOperand SrcReg0Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
+ MachineOperand SrcReg1Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
@@ -8074,10 +8055,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SIInstrWorklist &Worklist,
Register FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
- .addReg(DestSub0)
- .addImm(AMDGPU::sub0)
- .addReg(DestSub1)
- .addImm(AMDGPU::sub1);
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
@@ -8105,8 +8086,8 @@ void SIInstrInfo::splitScalar64BitXnor(SIInstrWorklist &Worklist,
Register Interm = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- MachineOperand* Op0;
- MachineOperand* Op1;
+ MachineOperand *Op0;
+ MachineOperand *Op1;
if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) {
Op0 = &Src0;
@@ -8116,14 +8097,13 @@ void SIInstrInfo::splitScalar64BitXnor(SIInstrWorklist &Worklist,
Op1 = &Src0;
}
- BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm)
- .add(*Op0);
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_NOT_B64), Interm).add(*Op0);
Register NewDest = MRI.createVirtualRegister(DestRC);
MachineInstr &Xor = *BuildMI(MBB, MII, DL, get(AMDGPU::S_XOR_B64), NewDest)
- .addReg(Interm)
- .add(*Op1);
+ .addReg(Interm)
+ .add(*Op1);
MRI.replaceRegWith(Dest.getReg(), NewDest);
@@ -8142,9 +8122,8 @@ void SIInstrInfo::splitScalar64BitBCNT(SIInstrWorklist &Worklist,
MachineOperand &Src = Inst.getOperand(1);
const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64);
- const TargetRegisterClass *SrcRC = Src.isReg() ?
- MRI.getRegClass(Src.getReg()) :
- &AMDGPU::SGPR_32RegClass;
+ const TargetRegisterClass *SrcRC =
+ Src.isReg() ? MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -8152,10 +8131,10 @@ void SIInstrInfo::splitScalar64BitBCNT(SIInstrWorklist &Worklist,
const TargetRegisterClass *SrcSubRC =
RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
- MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
- AMDGPU::sub0, SrcSubRC);
- MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
- AMDGPU::sub1, SrcSubRC);
+ MachineOperand SrcRegSub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC);
+ MachineOperand SrcRegSub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub1, SrcSubRC);
BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
@@ -8177,10 +8156,10 @@ void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist,
MachineOperand &Dest = Inst.getOperand(0);
uint32_t Imm = Inst.getOperand(2).getImm();
- uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+ uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
- (void) Offset;
+ (void)Offset;
// Only sext_inreg cases handled.
assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 &&
@@ -8197,14 +8176,14 @@ void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist,
.addImm(BitWidth);
BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi)
- .addImm(31)
- .addReg(MidRegLo);
+ .addImm(31)
+ .addReg(MidRegLo);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
- .addReg(MidRegLo)
- .addImm(AMDGPU::sub0)
- .addReg(MidRegHi)
- .addImm(AMDGPU::sub1);
+ .addReg(MidRegLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(MidRegHi)
+ .addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
@@ -8216,14 +8195,14 @@ void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist,
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg)
- .addImm(31)
- .addReg(Src.getReg(), 0, AMDGPU::sub0);
+ .addImm(31)
+ .addReg(Src.getReg(), 0, AMDGPU::sub0);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg)
- .addReg(Src.getReg(), 0, AMDGPU::sub0)
- .addImm(AMDGPU::sub0)
- .addReg(TmpReg)
- .addImm(AMDGPU::sub1);
+ .addReg(Src.getReg(), 0, AMDGPU::sub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(TmpReg)
+ .addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
@@ -8288,7 +8267,8 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
Register DstReg, MachineRegisterInfo &MRI,
SIInstrWorklist &Worklist) const {
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
- E = MRI.use_end(); I != E;) {
+ E = MRI.use_end();
+ I != E;) {
MachineInstr &UseMI = *I->getParent();
unsigned OpNo = 0;
@@ -8336,27 +8316,25 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
// FIXME: Can do a lot better if we know the high bits of src0 or src1 are
// 0.
- BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
- .addImm(0xffff);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg).addImm(0xffff);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
- .addReg(ImmReg, RegState::Kill)
- .add(Src0);
+ .addReg(ImmReg, RegState::Kill)
+ .add(Src0);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32_e64), ResultReg)
- .add(Src1)
- .addImm(16)
- .addReg(TmpReg, RegState::Kill);
+ .add(Src1)
+ .addImm(16)
+ .addReg(TmpReg, RegState::Kill);
break;
}
case AMDGPU::S_PACK_LH_B32_B16: {
Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
- .addImm(0xffff);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg).addImm(0xffff);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32_e64), ResultReg)
- .addReg(ImmReg, RegState::Kill)
- .add(Src0)
- .add(Src1);
+ .addReg(ImmReg, RegState::Kill)
+ .add(Src0)
+ .add(Src1);
break;
}
case AMDGPU::S_PACK_HL_B32_B16: {
@@ -8374,14 +8352,14 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
Register ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
- .addImm(16)
- .add(Src0);
+ .addImm(16)
+ .add(Src0);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
- .addImm(0xffff0000);
+ .addImm(0xffff0000);
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32_e64), ResultReg)
- .add(Src1)
- .addReg(ImmReg, RegState::Kill)
- .addReg(TmpReg, RegState::Kill);
+ .add(Src1)
+ .addReg(ImmReg, RegState::Kill)
+ .addReg(TmpReg, RegState::Kill);
break;
}
default:
@@ -8455,8 +8433,8 @@ void SIInstrInfo::addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
}
}
-const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
- const MachineInstr &Inst) const {
+const TargetRegisterClass *
+SIInstrInfo::getDestEquivalentVGPRClass(const MachineInstr &Inst) const {
const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
switch (Inst.getOpcode()) {
@@ -8589,9 +8567,8 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
int64_t Format = ST.getGeneration() >= AMDGPUSubtarget::GFX11
? (int64_t)AMDGPU::UfmtGFX11::UFMT_32_FLOAT
: (int64_t)AMDGPU::UfmtGFX10::UFMT_32_FLOAT;
- return (Format << 44) |
- (1ULL << 56) | // RESOURCE_LEVEL = 1
- (3ULL << 60); // OOB_SELECT = 3
+ return (Format << 44) | (1ULL << 56) | // RESOURCE_LEVEL = 1
+ (3ULL << 60); // OOB_SELECT = 3
}
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
@@ -8610,8 +8587,7 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
}
uint64_t SIInstrInfo::getScratchRsrcWords23() const {
- uint64_t Rsrc23 = getDefaultRsrcDataFormat() |
- AMDGPU::RSRC_TID_ENABLE |
+ uint64_t Rsrc23 = getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size;
// GFX9 doesn't have ELEMENT_SIZE.
@@ -8650,8 +8626,8 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
if (!Addr || !Addr->isFI())
return Register();
- assert(!MI.memoperands_empty() &&
- (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
+ assert(!MI.memoperands_empty() && (*MI.memoperands_begin())->getAddrSpace() ==
+ AMDGPUAS::PRIVATE_ADDRESS);
FrameIndex = Addr->getIndex();
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@@ -8795,14 +8771,14 @@ SIInstrInfo::getSerializableTargetIndices() const {
/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
ScheduleHazardRecognizer *
SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
- const ScheduleDAG *DAG) const {
+ const ScheduleDAG *DAG) const {
return new GCNHazardRecognizer(DAG->MF);
}
/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
/// pass.
-ScheduleHazardRecognizer *
-SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
+ScheduleHazardRecognizer *SIInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const MachineFunction &MF) const {
return new GCNHazardRecognizer(MF);
}
@@ -8828,13 +8804,13 @@ SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
ArrayRef<std::pair<unsigned, const char *>>
SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
static const std::pair<unsigned, const char *> TargetFlags[] = {
- { MO_GOTPCREL, "amdgpu-gotprel" },
- { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" },
- { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" },
- { MO_REL32_LO, "amdgpu-rel32-lo" },
- { MO_REL32_HI, "amdgpu-rel32-hi" },
- { MO_ABS32_LO, "amdgpu-abs32-lo" },
- { MO_ABS32_HI, "amdgpu-abs32-hi" },
+ {MO_GOTPCREL, "amdgpu-gotprel"},
+ {MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo"},
+ {MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi"},
+ {MO_REL32_LO, "amdgpu-rel32-lo"},
+ {MO_REL32_HI, "amdgpu-rel32-hi"},
+ {MO_ABS32_LO, "amdgpu-abs32-lo"},
+ {MO_ABS32_HI, "amdgpu-abs32-hi"},
};
return ArrayRef(TargetFlags);
@@ -8883,11 +8859,10 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
}
-MachineInstrBuilder
-SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- const DebugLoc &DL,
- Register DestReg) const {
+MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ Register DestReg) const {
if (ST.hasAddNoCarry())
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_U32_e64), DestReg);
@@ -8896,7 +8871,7 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg)
- .addReg(UnusedCarry, RegState::Define | RegState::Dead);
+ .addReg(UnusedCarry, RegState::Define | RegState::Dead);
}
MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
@@ -8919,7 +8894,7 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
return MachineInstrBuilder();
return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg)
- .addReg(UnusedCarry, RegState::Define | RegState::Dead);
+ .addReg(UnusedCarry, RegState::Define | RegState::Dead);
}
bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
@@ -8932,7 +8907,8 @@ bool SIInstrInfo::isKillTerminator(unsigned Opcode) {
}
}
-const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const {
+const MCInstrDesc &
+SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const {
switch (Opcode) {
case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
@@ -9147,7 +9123,7 @@ static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) {
}
bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const {
- switch(MCOp) {
+ switch (MCOp) {
// These opcodes use indirect register addressing so
// they need special handling by codegen (currently missing).
// Therefore it is too risky to allow these opcodes
@@ -9172,7 +9148,7 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
unsigned Gen = subtargetEncodingFamily(ST);
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
- ST.getGeneration() == AMDGPUSubtarget::GFX9)
+ ST.getGeneration() == AMDGPUSubtarget::GFX9)
Gen = SIEncodingFamily::GFX9;
// Adjust the encoding family to GFX80 for D16 buffer instructions when the
@@ -9230,17 +9206,17 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
return MCOp;
}
-static
-TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd) {
+static TargetInstrInfo::RegSubRegPair
+getRegOrUndef(const MachineOperand &RegOpnd) {
assert(RegOpnd.isReg());
- return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair() :
- getRegSubRegPair(RegOpnd);
+ return RegOpnd.isUndef() ? TargetInstrInfo::RegSubRegPair()
+ : getRegSubRegPair(RegOpnd);
}
-TargetInstrInfo::RegSubRegPair
-llvm::getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg) {
+TargetInstrInfo::RegSubRegPair llvm::getRegSequenceSubReg(MachineInstr &MI,
+ unsigned SubReg) {
assert(MI.isRegSequence());
- for (unsigned I = 0, E = (MI.getNumOperands() - 1)/ 2; I < E; ++I)
+ for (unsigned I = 0, E = (MI.getNumOperands() - 1) / 2; I < E; ++I)
if (MI.getOperand(1 + 2 * I + 1).getImm() == SubReg) {
auto &RegOp = MI.getOperand(1 + 2 * I);
return getRegOrUndef(RegOp);
@@ -9255,7 +9231,8 @@ static bool followSubRegDef(MachineInstr &MI,
if (!RSR.SubReg)
return false;
switch (MI.getOpcode()) {
- default: break;
+ default:
+ break;
case AMDGPU::REG_SEQUENCE:
RSR = getRegSequenceSubReg(MI, RSR.SubReg);
return true;
@@ -9311,8 +9288,7 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
}
bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
- Register VReg,
- const MachineInstr &DefMI,
+ Register VReg, const MachineInstr &DefMI,
const MachineInstr &UseMI) {
assert(MRI.isSSA() && "Must be run on SSA");
@@ -9372,7 +9348,7 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
int NumInst = 0;
// Stop scan when we have seen all the uses.
- for (auto I = std::next(DefMI.getIterator()); ; ++I) {
+ for (auto I = std::next(DefMI.getIterator());; ++I) {
assert(I != DefBB->end());
if (I->isDebugInstr())
@@ -9809,12 +9785,12 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
: AMDGPU::S_BITCMP1_B32
- : IsReversedCC ? AMDGPU::S_BITCMP0_B64
- : AMDGPU::S_BITCMP1_B64;
+ : IsReversedCC ? AMDGPU::S_BITCMP0_B64
+ : AMDGPU::S_BITCMP1_B64;
BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
- .add(*SrcOp)
- .addImm(BitNo);
+ .add(*SrcOp)
+ .addImm(BitNo);
Def->eraseFromParent();
return true;
@@ -9872,8 +9848,8 @@ void SIInstrInfo::enforceOperandRCAlignment(MachineInstr &MI,
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
Register DataReg = Op.getReg();
bool IsAGPR = RI.isAGPR(MRI, DataReg);
- Register Undef = MRI.createVirtualRegister(
- IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
+ Register Undef = MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AGPR_32RegClass
+ : &AMDGPU::VGPR_32RegClass);
BuildMI(*BB, MI, DL, get(AMDGPU::IMPLICIT_DEF), Undef);
Register NewVR =
MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
diff --git a/llvm/test/MachineVerifier/fix-illegal-copies.mir b/llvm/test/MachineVerifier/AMDGPU/fix-illegal-vector-copies.mir
similarity index 97%
rename from llvm/test/MachineVerifier/fix-illegal-copies.mir
rename to llvm/test/MachineVerifier/AMDGPU/fix-illegal-vector-copies.mir
index c3acebf7a694eb..50567ac9a8454b 100644
--- a/llvm/test/MachineVerifier/fix-illegal-copies.mir
+++ b/llvm/test/MachineVerifier/AMDGPU/fix-illegal-vector-copies.mir
@@ -1,8 +1,7 @@
# RUN: not --crash llc -march=amdgcn -mcpu=gfx1200 -run-pass=none -o /dev/null %s 2>&1 | FileCheck %s
-# REQUIRES: amdgpu-registered-target
---
-name: fix-illegal-copies
+name: fix-illegal-vector-copies
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
More information about the llvm-commits
mailing list