[llvm] bc27a31 - [AMDGPU] Fix copyPhysReg to not produce unalined vgpr access
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 15 14:14:39 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-03-15T14:14:30-07:00
New Revision: bc27a31801a3ec79e860aba9ac5df77a4d73e330
URL: https://github.com/llvm/llvm-project/commit/bc27a31801a3ec79e860aba9ac5df77a4d73e330
DIFF: https://github.com/llvm/llvm-project/commit/bc27a31801a3ec79e860aba9ac5df77a4d73e330.diff
LOG: [AMDGPU] Fix copyPhysReg to not produce unalined vgpr access
RA can insert something like a sub1_sub2 COPY of a wide VGPR
tuple which results in the unaligned acces with v_pk_mov_b32
after the copy is expanded. This is regression after D97316.
Differential Revision: https://reviews.llvm.org/D98549
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4035ff1817c1..4c68247b07a4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -875,8 +875,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- if (RC->hasSuperClassEq(&AMDGPU::VReg_64RegClass) &&
- !RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
+ const TargetRegisterClass *SrcRC = RI.getPhysRegClass(SrcReg);
+ if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
if (ST.hasPackedFP32Ops()) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestReg)
.addImm(SISrcMods::OP_SEL_1)
@@ -895,7 +895,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
if (RI.isSGPRClass(RC)) {
- if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
+ if (!RI.isSGPRClass(SrcRC)) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
@@ -906,12 +906,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.hasAGPRs(RC)) {
- Opcode = (RI.hasVGPRs(RI.getPhysRegClass(SrcReg))) ?
+ Opcode = (RI.hasVGPRs(SrcRC)) ?
AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
- } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
+ } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) {
Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
} else if ((Size % 64 == 0) && RI.hasVGPRs(RC) &&
- !RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
+ (RI.isProperlyAlignedRC(*RC) &&
+ (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
// TODO: In 96-bit case, could do a 64-bit mov and then a 32-bit mov.
if (ST.hasPackedFP32Ops()) {
Opcode = AMDGPU::V_PK_MOV_B32;
@@ -3831,10 +3832,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
// Check that this is the aligned version of the class.
- if (!RC || ((IsVGPR && !RC->hasSuperClassEq(RI.getVGPRClassForBitWidth(
- RI.getRegSizeInBits(*RC)))) ||
- (IsAGPR && !RC->hasSuperClassEq(RI.getAGPRClassForBitWidth(
- RI.getRegSizeInBits(*RC)))))) {
+ if (!RC || !RI.isProperlyAlignedRC(*RC)) {
ErrInfo = "Subtarget requires even aligned vector registers";
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 0f44182660a5..c6ecf2a66e9c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2338,6 +2338,18 @@ MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
return AMDGPU::NoRegister;
}
+bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
+ if (!ST.needsAlignedVGPRs())
+ return true;
+
+ if (hasVGPRs(&RC))
+ return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
+ if (hasAGPRs(&RC))
+ return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
+
+ return true;
+}
+
bool SIRegisterInfo::isConstantPhysReg(MCRegister PhysReg) const {
switch (PhysReg) {
case AMDGPU::SGPR_NULL:
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index e3910dd9e0fc..5d360677bed6 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -326,6 +326,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
// \returns \p Reg otherwise.
MCPhysReg get32BitRegister(MCPhysReg Reg) const;
+ // Returns true if a given register class is properly aligned for
+ // the subtarget.
+ bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
+
/// Return all SGPR128 which satisfy the waves per execution unit requirement
/// of the subtarget.
ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
diff --git a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
index 462a0127a821..4f32a8d580da 100644
--- a/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
+++ b/llvm/test/CodeGen/AMDGPU/copy_phys_vgpr64.mir
@@ -158,3 +158,193 @@ body: |
; GFX90A: $vgpr2_vgpr3 = V_PK_MOV_B32 8, $sgpr6_sgpr7, 12, $sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $sgpr4_sgpr5_sgpr6_sgpr7
...
+
+---
+name: copy_v64_to_v64_unaligned
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr2_vgpr3
+ ; GFX908-LABEL: name: copy_v64_to_v64_unaligned
+ ; GFX908: liveins: $vgpr2_vgpr3
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3
+ ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec
+ ; GFX90A-LABEL: name: copy_v64_to_v64_unaligned
+ ; GFX90A: liveins: $vgpr2_vgpr3
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr2, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $vgpr2_vgpr3
+ ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit $vgpr2_vgpr3, implicit $exec
+ $vgpr1_vgpr2 = COPY killed $vgpr2_vgpr3, implicit $exec
+...
+
+---
+name: copy_v64_unaligned_to_v64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr3_vgpr4
+ ; GFX908-LABEL: name: copy_v64_unaligned_to_v64
+ ; GFX908: liveins: $vgpr3_vgpr4
+ ; GFX908: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec
+ ; GFX90A-LABEL: name: copy_v64_unaligned_to_v64
+ ; GFX90A: liveins: $vgpr3_vgpr4
+ ; GFX90A: $vgpr0 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr3_vgpr4
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr4, implicit $exec, implicit killed $vgpr3_vgpr4, implicit $exec
+ $vgpr0_vgpr1 = COPY killed $vgpr3_vgpr4, implicit $exec
+...
+
+---
+name: copy_v128_to_v128_unaligned
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr8_vgpr9_vgpr10_vgpr11
+ ; GFX908-LABEL: name: copy_v128_to_v128_unaligned
+ ; GFX908: liveins: $vgpr8_vgpr9_vgpr10_vgpr11
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+ ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+ ; GFX908: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+ ; GFX908: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
+ ; GFX90A-LABEL: name: copy_v128_to_v128_unaligned
+ ; GFX90A: liveins: $vgpr8_vgpr9_vgpr10_vgpr11
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+ ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+ ; GFX90A: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit $vgpr8_vgpr9_vgpr10_vgpr11
+ ; GFX90A: $vgpr4 = V_MOV_B32_e32 $vgpr11, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
+ $vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed $vgpr8_vgpr9_vgpr10_vgpr11, implicit $exec
+...
+
+---
+name: copy_v128_unaligned_to_v128
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr7_vgpr8_vgpr9_vgpr10
+ ; GFX908-LABEL: name: copy_v128_unaligned_to_v128
+ ; GFX908: liveins: $vgpr7_vgpr8_vgpr9_vgpr10
+ ; GFX908: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+ ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+ ; GFX908: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
+ ; GFX90A-LABEL: name: copy_v128_unaligned_to_v128
+ ; GFX90A: liveins: $vgpr7_vgpr8_vgpr9_vgpr10
+ ; GFX90A: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+ ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr7_vgpr8_vgpr9_vgpr10
+ ; GFX90A: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed $vgpr7_vgpr8_vgpr9_vgpr10, implicit $exec
+...
+
+---
+name: copy_s64_to_v64_unaligned
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr8_sgpr9
+ ; GFX908-LABEL: name: copy_s64_to_v64_unaligned
+ ; GFX908: liveins: $sgpr8_sgpr9
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9
+ ; GFX908: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec
+ ; GFX90A-LABEL: name: copy_s64_to_v64_unaligned
+ ; GFX90A: liveins: $sgpr8_sgpr9
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr8_sgpr9
+ ; GFX90A: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit killed $sgpr8_sgpr9, implicit $exec
+ $vgpr1_vgpr2 = COPY killed $sgpr8_sgpr9, implicit $exec
+...
+
+---
+name: copy_s128_to_v128_unaligned
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX908-LABEL: name: copy_s128_to_v128_unaligned
+ ; GFX908: liveins: $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX908: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX908: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX908: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
+ ; GFX90A-LABEL: name: copy_s128_to_v128_unaligned
+ ; GFX90A: liveins: $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3_vgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX90A: $vgpr2 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX90A: $vgpr3 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+ ; GFX90A: $vgpr4 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
+ $vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
+...
+
+---
+name: copy_v96_to_v96_unaligned
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr8_vgpr9_vgpr10
+ ; GFX908-LABEL: name: copy_v96_to_v96_unaligned
+ ; GFX908: liveins: $vgpr8_vgpr9_vgpr10
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10
+ ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10
+ ; GFX908: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec
+ ; GFX90A-LABEL: name: copy_v96_to_v96_unaligned
+ ; GFX90A: liveins: $vgpr8_vgpr9_vgpr10
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $vgpr8_vgpr9_vgpr10
+ ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit $vgpr8_vgpr9_vgpr10
+ ; GFX90A: $vgpr3 = V_MOV_B32_e32 $vgpr10, implicit $exec, implicit killed $vgpr8_vgpr9_vgpr10, implicit $exec
+ $vgpr1_vgpr2_vgpr3 = COPY killed $vgpr8_vgpr9_vgpr10, implicit $exec
+...
+
+---
+name: copy_v96_unaligned_to_v96
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr7_vgpr8_vgpr9
+ ; GFX908-LABEL: name: copy_v96_unaligned_to_v96
+ ; GFX908: liveins: $vgpr7_vgpr8_vgpr9
+ ; GFX908: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9
+ ; GFX908: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec
+ ; GFX90A-LABEL: name: copy_v96_unaligned_to_v96
+ ; GFX90A: liveins: $vgpr7_vgpr8_vgpr9
+ ; GFX90A: $vgpr0 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr7_vgpr8_vgpr9
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $vgpr8, implicit $exec, implicit $vgpr7_vgpr8_vgpr9
+ ; GFX90A: $vgpr2 = V_MOV_B32_e32 $vgpr9, implicit $exec, implicit killed $vgpr7_vgpr8_vgpr9, implicit $exec
+ $vgpr0_vgpr1_vgpr2 = COPY killed $vgpr7_vgpr8_vgpr9, implicit $exec
+...
+
+---
+name: copy_s96_to_v96
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2
+ ; GFX908-LABEL: name: copy_s96_to_v96
+ ; GFX908: liveins: $sgpr0_sgpr1_sgpr2
+ ; GFX908: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+ ; GFX908: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+ ; GFX90A-LABEL: name: copy_s96_to_v96
+ ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2
+ ; GFX90A: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $sgpr0_sgpr1_sgpr2
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+ ; GFX90A: $vgpr2 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+ $vgpr0_vgpr1_vgpr2 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec
+...
+
+---
+name: copy_s96_to_v96_unaligned
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2
+ ; GFX908-LABEL: name: copy_s96_to_v96_unaligned
+ ; GFX908: liveins: $sgpr0_sgpr1_sgpr2
+ ; GFX908: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2
+ ; GFX908: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+ ; GFX908: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+ ; GFX90A-LABEL: name: copy_s96_to_v96_unaligned
+ ; GFX90A: liveins: $sgpr0_sgpr1_sgpr2
+ ; GFX90A: $vgpr3 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit-def $vgpr1_vgpr2_vgpr3, implicit $sgpr0_sgpr1_sgpr2
+ ; GFX90A: $vgpr2 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
+ ; GFX90A: $vgpr1 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
+ $vgpr1_vgpr2_vgpr3 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec
+...
More information about the llvm-commits
mailing list